diff --git a/CMakeLists.txt b/CMakeLists.txt index cc6a9dfc..00040ebf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ # SOFTWARE. cmake_minimum_required(VERSION 3.0.2) -project(NSIMD VERSION 2.2 LANGUAGES CXX) +project(NSIMD VERSION 3.0 LANGUAGES C CXX) # ----------------------------------------------------------------------------- # First check that NSIMD code has been generated @@ -62,8 +62,11 @@ function(nsimd_get_compiler_argument simd_ext argument) set(mapping_sve512 "/DSVE512") set(mapping_sve1024 "/DSVE1024") set(mapping_sve2048 "/DSVE2048") + set(mapping_vmx "/DVMX") + set(mapping_vsx "/DVSX") set(mapping_cuda "/DCUDA") set(mapping_rocm "/DROCM") + set(mapping_oneapi "/ONEAPI") else() set(mapping_sse2 "-DSSE2;-msse2" ) set(mapping_sse42 "-DSSE42;-msse4.2" ) @@ -89,8 +92,11 @@ function(nsimd_get_compiler_argument simd_ext argument) ";-msve-vector-bits=1024") set(mapping_sve2048 "-DSVE2048 -march=armv8.2-a+sve" ";-msve-vector-bits=2048") + set(mapping_vmx "-DVMX;-mcpu=powerpc64le;-maltivec") + set(mapping_vsx "-DVSX;-mcpu=powerpc64le;-mvsx") set(mapping_cuda "-DCUDA") set(mapping_rocm "-DROCM") + set(mapping_oneapi "-DONEAPI") endif() if (DEFINED mapping_${simd_ext}) set(${argument} "${mapping_${simd_ext}}" PARENT_SCOPE) @@ -111,38 +117,75 @@ nsimd_get_compiler_argument(${simd} NSIMD_COMPILATION_OPTIONS) # ----------------------------------------------------------------------------- # Object file selection -set(NSIMD_OBJS "fp16;memory;ulps;api_cpu") +set(NSIMD_OBJS "fp16;gpu;memory;api_cpu;rempitab;sleefsp;sleefdp") if ("${simd}" STREQUAL "sse2") - set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2") + set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;sleef_sse2_f32;sleef_sse2_f64") elseif ("${simd}" STREQUAL "sse42") - set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42") + set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;" + "sleef_sse2_f32;sleef_sse2_f64;" + "sleef_sse42_f32;sleef_sse42_f64") elseif ("${simd}" STREQUAL "avx") - set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx") + set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;" + "sleef_sse2_f32;sleef_sse2_f64;" + "sleef_sse42_f32;sleef_sse42_f64;" + "sleef_avx_f32;sleef_avx_f64") elseif ("${simd}" STREQUAL "avx2") - set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2") + set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2;" + "sleef_sse2_f32;sleef_sse2_f64;" + "sleef_sse42_f32;sleef_sse42_f64;" + "sleef_avx_f32;sleef_avx_f64;" + "sleef_avx2_f32;sleef_avx2_f64") elseif ("${simd}" STREQUAL "avx512_knl") set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2" - ";api_avx512_knl") + "sleef_sse2_f32;sleef_sse2_f64;" + "sleef_sse42_f32;sleef_sse42_f64;" + "sleef_avx_f32;sleef_avx_f64;" + "sleef_avx2_f32;sleef_avx2_f64;" + "api_avx512_knl;sleef_avx512_knl_f32;sleef_avx512_knl_f64") elseif ("${simd}" STREQUAL "avx512_skylake") - set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2" - ";api_avx512_skylake") + set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2;" + "api_avx512_skylake;sleef_avx512_skylake_f32;" + "sleef_sse2_f32;sleef_sse2_f64;" + "sleef_sse42_f32;sleef_sse42_f64;" + "sleef_avx_f32;sleef_avx_f64;" + "sleef_avx2_f32;sleef_avx2_f64;" + "sleef_avx512_skylake_f64") elseif ("${simd}" STREQUAL "neon128") - set(NSIMD_OBJS "${NSIMD_OBJS};api_neon128") + set(NSIMD_OBJS "${NSIMD_OBJS};api_neon128;" + "sleef_neon128_f32;sleef_neon128_f64") elseif ("${simd}" STREQUAL "aarch64") - set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64") + set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;" + "sleef_aarch64_f32;sleef_aarch64_f64") elseif ("${simd}" STREQUAL "sve") - set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve") + set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve;" + "sleef_aarch64_f32;sleef_aarch64_f64;" + "sleef_sve_f32;sleef_sve_f64") elseif ("${simd}" STREQUAL "sve128") - set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve128") + set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve128;" + "sleef_aarch64_f32;sleef_aarch64_f64;" + "sleef_sve128_f32;sleef_sve128_f64") elseif ("${simd}" STREQUAL "sve256") - set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve256") + set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve256;" + "sleef_aarch64_f32;sleef_aarch64_f64;" + "sleef_sve256_f32;sleef_sve256_f64") elseif ("${simd}" STREQUAL "sve512") - set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve512") + set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve512;" + "sleef_aarch64_f32;sleef_aarch64_f64;" + "sleef_sve512_f32;sleef_sve512_f64") elseif ("${simd}" STREQUAL "sve1024") - set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve1024") + set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve1024;" + "sleef_aarch64_f32;sleef_aarch64_f64;" + "sleef_sve1024_f32;sleef_sve1024_f64") elseif ("${simd}" STREQUAL "sve2048") - set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve2048") + set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve2048;" + "sleef_aarch64_f32;sleef_aarch64_f64;" + "sleef_sve2048_f32;sleef_sve2048_f64") +elseif ("${simd}" STREQUAL "vmx") + set(NSIMD_OBJS "${NSIMD_OBJS};api_vmx;sleef_vmx_f32;sleef_vmx_f64") +elseif ("${simd}" STREQUAL "vsx") + set(NSIMD_OBJS "${NSIMD_OBJS};api_vmx;api_vsx;sleef_vmx_f32;sleef_vmx_f64;" + "sleef_vsx_f32;sleef_vmx_f64") endif() # ----------------------------------------------------------------------------- @@ -150,12 +193,33 @@ endif() set(NSIMD_LIB_DEPS "") foreach(o ${NSIMD_OBJS}) - add_library(${o} OBJECT src/${o}.cpp) + if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.cpp") + add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.cpp") + elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.c") + add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.c") + elseif(("${o}" STREQUAL "sleef_neon128_f64") OR + ("${o}" STREQUAL "sleef_vmx_f64")) + add_library(${o} OBJECT + "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimddp_emulation.c") + elseif("${o}" STREQUAL "sleef_vmx_f32") + add_library(${o} OBJECT + "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimdsp_emulation.c") + elseif(o MATCHES "sleef_.*_f32") + add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimdsp.c") + elseif(o MATCHES "sleef_.*_f64") + add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimddp.c") + endif() + if (MSVC) + set(sleef_cflags "/DNDEBUG;/DDORENAME=1") + else() + set(sleef_cflags "-DNDEBUG;-DDORENAME=1") + endif() set_property(TARGET ${o} PROPERTY POSITION_INDEPENDENT_CODE ON) target_include_directories(${o} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") if (MSVC) target_compile_definitions(${o} PUBLIC "/D_CRT_SECURE_NO_WARNINGS") endif() + set(buf "") if ("${o}" STREQUAL "api_sse2") nsimd_get_compiler_argument("sse2" buf) elseif ("${o}" STREQUAL "api_sse42") @@ -184,15 +248,75 @@ foreach(o ${NSIMD_OBJS}) nsimd_get_compiler_argument("sve1024" buf) elseif ("${o}" STREQUAL "api_sve2048") nsimd_get_compiler_argument("sve2048" buf) + elseif ("${o}" STREQUAL "api_vmx") + nsimd_get_compiler_argument("vmx" buf) + elseif ("${o}" STREQUAL "api_vsx") + nsimd_get_compiler_argument("vsx" buf) elseif ("${o}" STREQUAL "api_cuda") nsimd_get_compiler_argument("cuda" buf) elseif ("${o}" STREQUAL "api_rocm") nsimd_get_compiler_argument("rocm" buf) elseif ("${o}" STREQUAL "api_cpu") nsimd_get_compiler_argument("cpu" buf) + elseif ("${o}" STREQUAL "rempitab") + list(APPEND buf "${sleef_cflags}") + elseif ("${o}" STREQUAL "sleefsp") + list(APPEND buf "${sleef_cflags}") + elseif ("${o}" STREQUAL "sleefdp") + list(APPEND buf "${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_sse2_") + nsimd_get_compiler_argument("sse2" buf) + list(APPEND buf "-DNSIMD_SSE2;-DENABLE_SSE2=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_sse42_") + nsimd_get_compiler_argument("sse42" buf) + list(APPEND buf "-DNSIMD_SSE42;-DENABLE_SSE4=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_avx_") + nsimd_get_compiler_argument("avx" buf) + list(APPEND buf "-DNSIMD_AVX;-DENABLE_AVX=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_avx2_") + nsimd_get_compiler_argument("avx2" buf) + list(APPEND buf "-DNSIMD_AVX2;-DENABLE_AVX2=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_avx512_knl_") + nsimd_get_compiler_argument("avx512_knl" buf) + list(APPEND buf "-DNSIMD_AVX512_KNL;-DENABLE_AVX512F=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_avx512_skylake_") + nsimd_get_compiler_argument("avx512_skylake" buf) + list(APPEND buf + "-DNSIMD_AVX512_SKYLAKE;-DENABLE_AVX512F=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_neon128_") + nsimd_get_compiler_argument("neon128" buf) + list(APPEND buf "-DNSIMD_NEON128;-DENABLE_NEON32=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_aarch64_") + nsimd_get_compiler_argument("aarch64" buf) + list(APPEND buf "-DNSIMD_AARCH64;-DENABLE_ADVSIMD=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_sve_") + nsimd_get_compiler_argument("sve" buf) + list(APPEND buf "-DNSIMD_SVE;-DENABLE_SVE=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_sve128_") + nsimd_get_compiler_argument("sve128" buf) + list(APPEND buf "-DNSIMD_SVE128;-DENABLE_SVE=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_sve256_") + nsimd_get_compiler_argument("sve256" buf) + list(APPEND buf "-DNSIMD_SVE256;-DENABLE_SVE=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_sve512_") + nsimd_get_compiler_argument("sve512" buf) + list(APPEND buf "-DNSIMD_SVE512;-DENABLE_SVE=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_sve1024_") + nsimd_get_compiler_argument("sve1024" buf) + list(APPEND buf "-DNSIMD_SVE1024;-DENABLE_SVE=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_sve2048_") + nsimd_get_compiler_argument("sve2048" buf) + list(APPEND buf "-DNSIMD_SVE2048;-DENABLE_SVE=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_vmx_") + nsimd_get_compiler_argument("vmx" buf) + list(APPEND buf "-DNSIMD_VMX;-DENABLE_VSX=1;${sleef_cflags}") + elseif ("${o}" MATCHES "sleef_vsx_") + nsimd_get_compiler_argument("vsx" buf) + list(APPEND buf "-DNSIMD_VSX;-DENABLE_VSX=1;${sleef_cflags}") else() set(buf "") endif() + message(STATUS "DEBUG: ${o} --> ${buf}") if (NOT "${buf}" STREQUAL "") target_compile_options(${o} PUBLIC "${buf}") endif() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7bd491fd..b3eb7448 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -64,10 +64,15 @@ maintainer will then merge or comment the pull request. + `SVE` 512 bits known at compiled time called `SVE512` in source code. + `SVE` 1024 bits known at compiled time called `SVE1024` in source code. + `SVE` 2048 bits known at compiled time called `SVE2048` in source code. +- IBM POWERPC + + `VMX` 128 bits as found on POWER6 CPUs called `VMX` in source code. + + `VSX` 128 bits as found on POWER7/8 CPUs called `VSX` in source code. - NVIDIA + `CUDA` called `CUDA` in source code - AMD + `ROCm` called `ROCM` in source code +- Intel oneAPI + + `oneAPI` called `ONEAPI` in source code `nsimd` currently supports the following types: - `i8`: signed integers over 8 bits (usually `signed char`), @@ -126,10 +131,15 @@ follows: `float16`s, `float`s and `double`s. + `SVE2048`: `svfoo_f16`, `svfoo_f32` and `svfoo_f64` for respectively `float16`s, `float`s and `double`s. +- IBM POWERPC + + `VMX`: `vec_foo` for `float`s and no intrinsics for `double`s. + + `VSX`: `vec_foo` for `float`s and `double`s. - NVIDIA + `CUDA`: no intrinsics is provided. - AMD + `ROCM`: no intrinsics is provided. +- Intel oneAPI + + `ONEAPI`: no intrinsics is provided. First thing to do is to declare this new intrinsic to the generation system. A lot of work is done by the generation system such as generating all functions @@ -253,7 +263,7 @@ Now that the operator is registered, all signatures will be generated but the implemenatations will be missing. Type ```sh -python3 egg/hatch.py -Af +python3 egg/hatch.py -lf ``` and the following files (among many other) should appear: @@ -272,6 +282,8 @@ and the following files (among many other) should appear: - `include/nsimd/arm/sve512/foo.h` - `include/nsimd/arm/sve1024/foo.h` - `include/nsimd/arm/sve2048/foo.h` +- `include/nsimd/ppc/vmx/foo.h` +- `include/nsimd/ppc/vsx/foo.h` They each correspond to the implementations of the operator for each supported architectures. When openening one of these files the implementations in plain @@ -287,9 +299,11 @@ files: - `egg/platform_cpu.py` - `egg/platform_x86.py` - `egg/platform_arm.py` +- `egg/platform_ppc.py` - `egg/scalar.py` - `egg/cuda.py` - `egg/hip.py` +- `egg/oneapi.py` The idea is to produce plain C (not C++) code using Python string format. Each of the Python files provides some helper functions to ease as much as @@ -334,6 +348,7 @@ Python dictionary of the `get_impl` function: Then, above in the file we write the Python function `foo1` that will provide the C implementation of operator `foo`: + ```python def foo1(typ): return func_body( @@ -459,6 +474,28 @@ Here are some notes concerning the ARM implementation: 4. Do not forget to add the `foo` entry to the `impls` dictionary in the `get_impl` Python function. +### For IBM POWERPC + +```python +def foo1(simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['v', 'v']) + else: + return 'return vec_foo({in0});'.format(**fmtspec) +``` + +Here are some notes concerning the PPC implementation: + +1. For VMX, intrinsics on `double` almost never exist. +2. The Python helper function `has_to_be_emulated` returns `True` when the + implementation of `foo` concerns float16 or `double`s for `VMX`. When this + function returns True you can then use `emulation_code`. +3. The `emulation_code` function returns a generic implementation of an + operator. However this iplementation is not suitable for any operator + and the programmer has to take care of that. +4. Do not forget to add the `foo` entry to the `impls` dictionary in the + `get_impl` Python function. + ### The scalar CPU version ```python @@ -485,10 +522,10 @@ the float32 result to a float16. ### The GPU versions -The GPU generator Python files `cuda.py` and `rocm.py` are a bit different -from the other files but it is easy to find where to add the relevant -pieces of code as ROCm syntax is fully compatible with CUDA's one only needs -to modify the `cuda.py` file. +The GPU generator Python files `cuda.py`, `rocm.py` and `oneapi.py` are a bit +different from the other files but it is easy to find where to add the relevant +pieces of code. Note that ROCm syntax is fully compatible with CUDA's one only +needs to modify the `cuda.py` file while it easy to understand `oneapi.py`. The code to add for float32's is as follows to be added inside the `get_impl` Python function. @@ -497,8 +534,8 @@ Python function. return '1 / (1 - {in0}) + 1 / ((1 - {in0}) * (1 - {in0}))'.format(**fmtspec) ``` -The code to add for float16's is as follows to be added inside the -`get_impl_f16` Python function. +The code for CUDA and ROCm to add for float16's is as follows. It has to be +added inside the `get_impl_f16` Python function. ```python arch53_code = '''__half one = __float2half(1.0f); @@ -511,6 +548,13 @@ arch53_code = '''__half one = __float2half(1.0f); );'''.format(**fmtspec) ``` +As Intel oneAPI natively support float16's the code is the same as the one +for floats: + +```python +return '1 / (1 - {in0}) + 1 / ((1 - {in0}) * (1 - {in0}))'.format(**fmtspec) +``` + ### Implementing the test for the operator Now that we have written the implementations for the `foo` operator we must @@ -553,6 +597,59 @@ overview of Python functions present in the `egg/gen_test.py` file: some computations. This is the kind of tests that can handle our `foo` operator and therefore nothing has to be done on our part. +## Not all tests are to be done + +As explained in doing all tests is not recommanded. +Take for example the `cvt` operator. Testing `cvt` from say `f32` to `i32` +is complicated as the result depends on how NaN, infinities are handled and +on the current round mode. In turn these prameters depends on the vendor, the +chip, the bugs in the chip, the chosen rounding mode by users or other +softwares... + +The function `should_i_do_the_test` gives an hint on whether to implement the +test or not. Its code is really simple and you may need to modify it. The +listing below is a possible implementation that takes care of the case +described in the previous paragraph. + +```python +def should_i_do_the_test(operator, tt='', t=''): + if operator.name == 'cvt' and t in common.ftypes and tt in common.iutypes: + # When converting from float to int to float then we may not + # get the initial result because of roundings. As tests are usually + # done by going back and forth then both directions get tested in the + # end + return False + if operator.name == 'reinterpret' and t in common.iutypes and \ + tt in common.ftypes: + # When reinterpreting from int to float we may get NaN or infinities + # and no ones knows what this will give when going back to ints + # especially when float16 are emulated. Again as tests are done by + # going back and forth both directions get tested in the end. + return False + if operator.name in ['notb', 'andb', 'andnotb', 'xorb', 'orb'] and \ + t == 'f16': + # Bit operations on float16 are hard to check because they are + # emulated in most cases. Therefore going back and forth with + # reinterprets for doing bitwise operations make the bit in the last + # place to wrong. This is normal but makes testing real hard. So for + # now we do not test them on float16. + return False + if operator.name in ['len', 'set1', 'set1l', 'mask_for_loop_tail', + 'loadu', 'loada', 'storeu', 'storea', 'loadla', + 'loadlu', 'storela', 'storelu', 'if_else1']: + # These functions are used in almost every tests so we consider + # that they are extensively tested. + return False + if operator.name in ['store2a', 'store2u', 'store3a', 'store3u', + 'store4a', 'store4u', 'scatter', 'scatter_linear', + 'downcvt', 'to_logical']: + # These functions are tested along with their load counterparts. + # downcvt is tested along with upcvt and to_logical is tested with + # to_mask + return False + return True +``` + ### Conclusion At first sight the implementation of `foo` seems complicated because intrinsics @@ -637,3 +734,73 @@ functions: ``` Tests for the module have to be put into the `nsimd/tests/mymod` directory. + +## How to I add a new platform? + +The list of supported platforms is determined by looking in the `egg` +directory and listing all `platform_*.py` files. Each file must contain all +SIMD extensions for a given platform. For example the default (no SIMD) is +given by `platform_cpu.py`. All the Intel SIMD extensions are given by +`platform_x86.py`. + +Each Python file that implements a platform must be named +`platform_[name for platform].py` and must export at least the following +functions: + +- `def get_simd_exts()` + Return the list of SIMD extensions implemented by this file as a Python + list. + +- `def get_prev_simd_ext(simd_ext)` + Usually SIMD extensions are added over time by vendors and a chip + implementing a SIMD extension supports previous SIMD extension. This + function must return the previous SIMD extension supported by the vendor if + it exists otherwise it must return the empty string. Note that `cpu` is the + only SIMD extensions that has no previous SIMD extensions. Every other SIMD + extension has at least `cpu` as previous SIMD extension. + +- `def get_native_typ(simd_ext, typ)` + Return the native SIMD type corresponding of the SIMD extension `simd_ext` + whose elements are of type `typ`. If `typ` or `simd_ext` is not known then a + ValueError exception must be raised. + +- `def get_type(simd_ext, typ)` + Returns the "intrinsic" SIMD type corresponding to the given + arithmetic type. If `typ` or `simd_ext` is not known then a ValueError + exception must be raised. + +- `def get_additional_include(func, simd_ext, typ)` + Returns additional include if need be for the implementation of `func` for + the given `simd_ext` and `typ`. + +- `def get_logical_type(simd_ext, typ)` + Returns the "intrinsic" logical SIMD type corresponding to the given + arithmetic type. If `typ` or `simd_ext` is not known then a ValueError + exception must be raised. + +- `def get_nb_registers(simd_ext)` + Returns the number of registers for this SIMD extension. + +- `def get_impl(func, simd_ext, from_typ, to_typ)` + Returns the implementation (C code) for `func` on type `typ` for `simd_ext`. + If `typ` or `simd_ext` is not known then a ValueError exception must be + raised. Any `func` given satisfies `S func(T a0, T a1, ... T an)`. + +- `def has_compatible_SoA_types(simd_ext)` + Returns True iff the given `simd_ext` has structure of arrays types + compatible with NSIMD i.e. whose members are v1, v2, ... Returns False + otherwise. If `simd_ext` is not known then a ValueError exception must be + raised. + +- `def get_SoA_type(simd_ext, typ, deg)` + Returns the structure of arrays types for the given `typ`, `simd_ext` and + `deg`. If `simd_ext` is not known or does not name a type whose + corresponding SoA types are compatible with NSIMD then a ValueError + exception must be raised. + +- `def emulate_fp16(simd_ext)` + Returns True iff the given SIMD extension has to emulate FP16's with + two FP32's. + +Then you are free to implement the SIMd extensions for the platform. See above +on how to add the implementations of operators. diff --git a/README.md b/README.md index 9af1446f..4c1ac4a3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ -Documentation can be found [here](https://agenium-scale.github.io/nsimd/). +Documentation can be found [here](https://agenium-scale.github.io/nsimd/). +We put a lot of effort into [testing](how_tests_are_done.md). # What is NSIMD? @@ -21,7 +22,8 @@ With two of its modules NSIMD provides three programming paradigms: | Architecture | NSIMD core | TET1D module | SPMD module | |:--------------------------------------|:----------:|:------------:|:-----------:| -| CPU (SIMD emulation) | Y | Y | Y | +| CPU (scalar functions) | Y | Y | Y | +| CPU (128-bits SIMD emulation) | Y | Y | Y | | Intel SSE 2 | Y | Y | Y | | Intel SSE 4.2 | Y | Y | Y | | Intel AVX | Y | Y | Y | @@ -32,8 +34,27 @@ With two of its modules NSIMD provides three programming paradigms: | Arm NEON 128 bits (ARMv8 and later) | Y | Y | Y | | Arm SVE (original sizeless SVE) | Y | Y | Y | | Arm fixed sized SVE | Y | Y | Y | +| IBM POWERPC VMX | Y | Y | Y | +| IBM POWERPC VSX | Y | Y | Y | | NVIDIA CUDA | N | Y | Y | | AMD ROCm | N | Y | Y | +| Intel oneAPI | N | Y | Y | + +## Contributions + +| Contributor | Contribution(s) | +|:---------------------|:--------------------------------------------------| +| Guillaume Quintin | Maintainer + main contributor | +| Alan Kelly | Arm NEON + mathematical functions | +| Kenny Péou | Fixed point module | +| Xavier Berault | PowerPC VMX and VSX | +| Vianney Stricher | NSIMD core + oneAPI in SPMD and TET1D modules | +| Quentin Khan | Soa/AoS loads and stores | +| Paul Gannay | PowerPC VMX, VSX + testing system | +| Charly Chevalier | Benchmarking system + Python internals | +| Erik Schnetter | Fixes + code generation | +| Lénaïc Bagnères | Fixes + TET1D module | +| Jean-Didier Pailleux | Shuffles operators | ## How it works? @@ -47,14 +68,15 @@ such as addition, multiplication, square root, etc, are all present in header files whereas big functions such as I/O are put in source files that are compiled as a `.so`/`.dll` library. -NSIMD provides C89, C++98, C++11, C++14 and C++20 APIs. All APIs allow writing -generic code. For the C API this is achieved through a thin layer of macros; -for the C++ APIs it is achieved using templates and function overloading. The -C++ APIs are split into two. The first part is a C-like API with only function -calls and direct type definitions for SIMD types while the second one provides -operator overloading, higher level type definitions that allows unrolling. -C++11, C++14 APIs add for instance templated type definitions and templated -constants while the C++20 API uses concepts for better error reporting. +NSIMD provides C89, C11, C++98, C++11, C++14 and C++20 APIs. All APIs allow +writing generic code. For the C API this is achieved through a thin layer of +macros and with the `_Generic` keyword for the C advanced API; for the C++ APIs +it is achieved using templates and function overloading. The C++ APIs are split +into two. The first part is a C-like API with only function calls and direct +type definitions for SIMD types while the second one provides operator +overloading, higher level type definitions that allows unrolling. C++11, C++14 +APIs add for instance templated type definitions and templated constants while +the C++20 API uses concepts for better error reporting. Binary compatibility is guaranteed by the fact that only a C ABI is exposed. The C++ API only wraps the C calls. @@ -83,7 +105,7 @@ make install where `SIMD_EXT` is one of the following: CPU, SSE2, SSE42, AVX, AVX2, AVX512\_KNL, AVX512\_SKYLAKE, NEON128, AARCH64, SVE, SVE128, SVE256, SVE512, -SVE1024, SVE2048, CUDA, ROCM. +SVE1024, SVE2048, VMX, VSX, CUDA, ROCM. Note that when compiling for NEON128 on Linux one has to choose the ABI, either armel or armhf. Default is armel. As CMake is unable to autodetect this @@ -137,15 +159,6 @@ The Python code can call `clang-format` to properly format all generated C/C++ source. On Linux you can install it via your package manager. On Windows you can use the official binary at . -Testing the library requires the MPFR library that can be found at -. - -Benchmarking the library requires Google Benchmark version 1.3 that can be -found at plus all the other SIMD -libraries used for comparison: -- MIPP () -- Sleef () - Compiling the library requires a C++98 compiler. Any version of GCC, Clang or MSVC will do. Note that the produced library and header files for the end-user are C89, C++98, C++11 compatible. Note that C/C++ files are generated by a @@ -175,6 +188,8 @@ will contain the library. Supported SIMD extension are: - sve512 - sve1024 - sve2048 +- vmx +- vsx - cuda - rocm @@ -184,6 +199,9 @@ Supported compiler are: - clang - icc - armclang +- xlc +- dpcpp +- fcc - cl - nvcc - hipcc @@ -256,8 +274,16 @@ Configure project for compilation. msvc Microsoft C and C++ compiler llvm The LLVM compiler infrastructure armclang Arm suite of compilers based on LLVM + xlc IBM suite of compilers + fcc_trad_mode + Fujitsu compiler in traditional mode + fcc_clang_mode + Fujitsu compiler in clang mode + emscripten + Emscripten suite for compiling into JS icc Intel C amd C++ compiler rocm Radeon Open Compute compilers + oneapi Intel oneAPI compilers cuda, cuda+gcc, cuda+clang, cuda+msvc Nvidia CUDA C++ compiler -comp=COMMAND,COMPILER[,PATH[,VERSION[,ARCHI]]] @@ -267,7 +293,7 @@ Configure project for compilation. compiling and/or setting the CUDA host compiler. COMMAND must be in { cc, c++, gcc, g++, cl, icc, nvcc, hipcc, hcc, clang, clang++, armclang, armclang++, - cuda-host-c++ } ; + cuda-host-c++, emcc, em++ } ; VERSION is compiler dependant. Note that VERSION can be set to only major number(s) in which case nsconfig fill missing numbers with zeros. @@ -277,15 +303,26 @@ Configure project for compilation. armel ARMv5 and ARMv6 32-bits ISA armhf ARMv7 32-bits ISA aarch64 ARM 64-bits ISA + ppc64el PowerPC 64-bits little entian + wasm32 WebAssembly with 32-bits memory indexing + wasm64 WebAssembly with 64-bits memory indexing Supported COMPILER: gcc, g++ GNU Compiler Collection clang, clang++ LLVM Compiler Infrastructure + emcc, em++ Emscripten compilers msvc, cl Microsoft Visual C++ armclang, armclang++ ARM Compiler + xlc, xlc++ IBM Compiler icc Intel C/C++ Compiler dpcpp Intel DPC++ Compiler nvcc Nvidia CUDA compiler hipcc ROCm HIP compiler + fcc_trad_mode, FCC_trad_mode + Fujitsu C and C++ traditionnal + compiler + fcc_clang_mode, FCC_clang_mode + Fujitsu C and C++ traditionnal + compiler -prefix=PREFIX Set path for installation to PREFIX -h, --help Print the current help @@ -296,7 +333,7 @@ NOTE: Nvidia CUDA compiler (nvcc) needs a host compiler. Usually on commands with 'cuda-host-c++'. The latter defaults to GCC on Linux systems and MSVC on Windows systems. The user can of course choose a specific version and path of this host compiler via the - '-comp=cuda-hostc++,... parameters. If nvcc is not chosen as the + '-comp=cuda-host-c++,... parameters. If nvcc is not chosen as the default C++ compiler but is used for compilation then its default C++ host compiler is 'c++'. The latter can also be customized via the '-comp=c++,...' command line switch. @@ -308,15 +345,12 @@ the ninja file of Makefile. ```bash $ ../nstools/bin/nsconfig .. -list-vars Project variables list: -name | description --------------------|--------------------------------------------------------- -simd | SIMD extension to use -cuda_arch_flags | CUDA target arch flag(s) for tests -mpfr | MPFR compilation flags (for tests only) -sleef | Sleef compilation flags (for benchmarks only) -benchmark | Google benchmark compilation flags (for benchmarks only) -build_library_only | Turn off tests/bench/ulps -static_libstdcpp | Compile the libstdc++ statically +name | description +-----------------|----------------------------------- +simd | SIMD extension to use +cuda_arch_flags | CUDA target arch flag(s) for tests +static_libstdcpp | Compile the libstdc++ statically +cpp20_tests | Enable C++20 tests ``` Finally one can choose what to do and compile NSIMD and its tests. @@ -327,17 +361,6 @@ $ ninja $ ninja tests ``` -Note that MPFR () is needed to compile the tests. If you -do not have the MPFR header installed on your system or if you want to use a -custom version of MPFR you can tell nsconfig how where to find it. - -```bash -$ ../nstools/bin/nsconfig .. -Dsimd=avx2 \ - -Dmpfr="-Iwhere/is/mpfr/include -Lwhere/is/mpfr/lib -lmpfr" -$ ninja -$ ninja tests -``` - Nsconfig comes with nstest a small tool to execute tests. ```bash @@ -401,11 +424,19 @@ but from a library point of view. NSIMD was designed following as closely as possible the following guidelines: -- Correctness primes over speed. +- Correctness primes over speed except for corner cases which may include the + following: + + Buggy intrinsics on rare input values (denormal numbers, infinities, + NaNs) in which case a slower but correct alternative may be + proposed to bypass the buggy intrinsics. + + A buggy intrinsics but for a specific version of a family of chips. It + would be unreasonable to penalize the majority of users vs. a few (or + even no) users. - Emulate with tricks and intrinsic integer arithmetic when not available. - Use common names as found in common computation libraries. - Do not hide SIMD registers, one variable (of a type such as `nsimd::pack`) - matches one register. + matches one register. When possible force the user to think different between + SIMD code and scalar code. - Make the life of the compiler as easy as possible: keep the code simple to allow the compiler to perform as many optimizations as possible. - Favor the advanced C++ API. @@ -448,9 +479,15 @@ found in `egg`. Please see for more details. -# LICENSE +# LICENSES + +NSIMD contains files from the excellent [Sleef library](https://sleef.org/) +whose license is stated below. The corresponding files are all located +in the `src` folder and have retained their original license notices. -Copyright (c) 2020 Agenium Scale +## NSIMD license + +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -469,3 +506,30 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +## Sleef license ([Boost Software License v1.0](https://www.boost.org/LICENSE_1_0.txt)) + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + diff --git a/build.nsconfig b/build.nsconfig index 1bc8e2d0..256b8c18 100644 --- a/build.nsconfig +++ b/build.nsconfig @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2020 Agenium Scale +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,62 +20,82 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -package_name nsimd-2.2 +package_name nsimd-3.0 +## ---------------------------------------------------------------------------- ## Get OS/Compiler specific file extensions -set o = @obj_ext -set exe = @exe_ext -set s = @asm_ext -set so = @shared_lib_ext -set lib = @shared_link_ext -set root = @source_dir -set make = @make_command -set build = @build_dir -set root = @source_dir - +set o = @obj_ext +set exe = @exe_ext +set s = @asm_ext +set so = @shared_lib_ext +set lib = @shared_link_ext +set root = @source_dir +set make = @make_command +set build = @build_dir +set root = @source_dir +set ccomp = @ccomp_name +set cppcomp = @cppcomp_name + +## ---------------------------------------------------------------------------- ## Some defaults ifnot_set "SIMD extension to use" simd = cpu ifnot_set "CUDA target arch flag(s) for tests" cuda_arch_flags = "" -[L] ifnot_set "MPFR compilation flags (for tests only)" mpfr = -lmpfr -[W] ifnot_set "MPFR compilation flags (for tests only)" mpfr = "" -ifnot_set "Sleef compilation flags (for benchmarks only)" sleef = -lsleef -ifnot_set "Google benchmark compilation flags (for benchmarks only)" \ - benchmark = -lbenchmark -ifnot_set "Turn off tests/bench/ulps" build_library_only = false -ifnot_set "Compile the libstdc++ statically" static_libstdcpp = false +ifnot_set "Compile the libstdc++ statically" static_libstdcpp = true +ifnot_set "Enable C++20 tests" cpp20_tests = "" +## ---------------------------------------------------------------------------- ## Targets for compilation -set o_for_ = fp16$o memory$o ulps$o api_cpu$o +set o_for_ = fp16$o memory$o ufp$o api_cpu$o rempitab$o \ + sleefsp$o sleefdp$o gpu$o set o_for_cpu = $o_for_ set o_for_cuda = $o_for_ set o_for_rocm = $o_for_ set o_for_oneapi = $o_for_ -set o_for_sse2 = $o_for_cpu api_sse2$o -set o_for_sse42 = $o_for_sse2 api_sse42$o -set o_for_avx = $o_for_sse42 api_avx$o -set o_for_avx2 = $o_for_avx api_avx2$o -set o_for_avx512_knl = $o_for_avx2 api_avx512_knl$o -set o_for_avx512_skylake = $o_for_avx2 api_avx512_skylake$o -set o_for_neon128 = $o_for_cpu api_neon128$o -set o_for_aarch64 = $o_for_cpu api_aarch64$o -set o_for_sve = $o_for_aarch64 api_sve$o -set o_for_sve128 = $o_for_aarch64 api_sve128$o -set o_for_sve256 = $o_for_aarch64 api_sve256$o -set o_for_sve512 = $o_for_aarch64 api_sve512$o -set o_for_sve1024 = $o_for_aarch64 api_sve1024$o -set o_for_sve2048 = $o_for_aarch64 api_sve2048$o - +set o_for_sse2 = $o_for_cpu api_sse2$o sleef_sse2_f32$o \ + sleef_sse2_f64$o +set o_for_sse42 = $o_for_sse2 api_sse42$o sleef_sse42_f32$o \ + sleef_sse42_f64$o +set o_for_avx = $o_for_sse42 api_avx$o sleef_avx_f32$o \ + sleef_avx_f64$o +set o_for_avx2 = $o_for_avx api_avx2$o sleef_avx2_f32$o \ + sleef_avx2_f64$o +set o_for_avx512_knl = $o_for_avx2 api_avx512_knl$o \ + sleef_avx512_knl_f32$o sleef_avx512_knl_f64$o +set o_for_avx512_skylake = $o_for_avx2 api_avx512_skylake$o \ + sleef_avx512_skylake_f32$o \ + sleef_avx512_skylake_f64$o +set o_for_neon128 = $o_for_cpu api_neon128$o sleef_neon128_f32$o \ + sleef_neon128_f64$o +set o_for_aarch64 = $o_for_cpu api_aarch64$o sleef_aarch64_f32$o \ + sleef_aarch64_f64$o +set o_for_sve = $o_for_aarch64 api_sve$o sleef_sve_f32$o \ + sleef_sve_f64$o +set o_for_sve128 = $o_for_aarch64 api_sve128$o sleef_sve128_f32$o \ + sleef_sve128_f64$o +set o_for_sve256 = $o_for_aarch64 api_sve256$o sleef_sve256_f32$o \ + sleef_sve256_f64$o +set o_for_sve512 = $o_for_aarch64 api_sve512$o sleef_sve512_f32$o \ + sleef_sve512_f64$o +set o_for_sve1024 = $o_for_aarch64 api_sve1024$o sleef_sve1024_f32$o \ + sleef_sve1024_f64$o +set o_for_sve2048 = $o_for_aarch64 api_sve2048$o sleef_sve2048_f32$o \ + sleef_sve2048_f64$o +set o_for_vmx = $o_for_cpu api_vmx$o sleef_vmx_f32$o sleef_vmx_f64$o +set o_for_vsx = $o_for_vmx api_vsx$o sleef_vsx_f32$o sleef_vsx_f64$o + +## ---------------------------------------------------------------------------- ## SIMD compiler flags -lambda cflags_for_generic_* = -DCPU -set cflags_for_generic_cuda = -DCUDA -set cflags_for_generic_rocm = -DROCM +lambda cflags_for_generic_* = -DCPU +set cflags_for_generic_cuda = -DCUDA +set cflags_for_generic_rocm = -DROCM +set cflags_for_generic_oneapi = -DONEAPI -set cflags_for_ = -DCPU -set cflags_for_cpu = ${cflags_for_generic_$simd$} +set cflags_for_ = ${cflags_for_generic_$simd$} +set cflags_for_cpu = $cflags_for_ set cflags_for_cuda = -DCUDA set cflags_for_rocm = -DROCM set cflags_for_oneapi = -DONEAPI @@ -94,95 +114,240 @@ set cflags_for_sve256 = -DSVE256 -msve256 set cflags_for_sve512 = -DSVE512 -msve512 set cflags_for_sve1024 = -DSVE1024 -msve1024 set cflags_for_sve2048 = -DSVE2048 -msve2048 +set cflags_for_vmx = -DVMX -mvmx +set cflags_for_vsx = -DVSX -mvsx +## ---------------------------------------------------------------------------- ## std default flag lambda std_flag_for_* = -std=c++98 set std_flag_for_rocm = -std=c++11 set std_flag_for_oneapi = -std=c++17 +## ---------------------------------------------------------------------------- ## libstdc++ linking mode set libstdcpp_static_link_true = -static-libstdc++ set libstdcpp_static_link_false = +## ---------------------------------------------------------------------------- ## Some defaults -set flags = -Wall -fPIC -O2 -I$root$/include -DNDEBUG -set cflags = ${std_flag_for_$simd$} $flags \ - ${libstdcpp_static_link_$static_libstdcpp$} +set flags = -Wall -fPIC -O2 -I$root$/include -DNDEBUG +set cflags = ${std_flag_for_$simd$} $flags \ + ${libstdcpp_static_link_$static_libstdcpp$} +set sleef_cflags = -fPIC -O2 -I$root$/src -DNDEBUG -DDORENAME=1 +## ---------------------------------------------------------------------------- ## Default building rules phony all deps libnsimd_$simd$$so$ build_file libnsimd_$simd$$so deps ${o_for_$simd$} - c++ -shared @in -o @out + c++ -fPIC -shared @in -o @out set ldflags = -fPIC -L. -lnsimd_$simd +## ---------------------------------------------------------------------------- ## Generic (emulation) rules for building +build_file gpu$o autodeps $root$/src/gpu.cpp + c++ $cflags$ $cflags_for_cpu @in -c -o @out + +build_file ufp$o autodeps $root$/src/ufp.cpp + c++ $cflags$ $cflags_for_cpu @in -c -o @out + build_file fp16$o autodeps $root$/src/fp16.cpp c++ $cflags$ $cflags_for_cpu @in -c -o @out build_file memory$o autodeps $root$/src/memory.cpp c++ $cflags$ $cflags_for_cpu @in -c -o @out -build_file ulps$o autodeps $root$/src/ulps.cpp - c++ $cflags$ $cflags_for_cpu @in -c -o @out +build_file rempitab$o autodeps $root$/src/rempitab.c + cc $sleef_cflags$ -c @in -o @out + +build_file sleefsp$o autodeps $root$/src/sleefsp.c + cc $sleef_cflags$ -c @in -o @out + +build_file sleefdp$o autodeps $root$/src/sleefdp.c + cc $sleef_cflags$ -c @in -o @out build_file api_cpu$o autodeps $root$/src/api_cpu.cpp c++ $cflags$ $cflags_for_cpu -c @in -o @out +## ---------------------------------------------------------------------------- ## Intel rules for building build_file api_sse2$o autodeps $root$/src/api_sse2.cpp c++ $cflags$ -c $cflags_for_sse2 @in -o @out +build_file sleef_sse2_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -msse2 -DNSIMD_SSE2 -DENABLE_SSE2=1 @in -o @out + +build_file sleef_sse2_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -msse2 -DNSIMD_SSE2 -DENABLE_SSE2=1 @in -o @out + build_file api_sse42$o autodeps $root$/src/api_sse42.cpp c++ $cflags$ -c $cflags_for_sse42 @in -o @out +build_file sleef_sse42_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -msse42 -DNSIMD_SSE42 -DENABLE_SSE4=1 @in -o @out + +build_file sleef_sse42_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -msse42 -DNSIMD_SSE42 -DENABLE_SSE4=1 @in -o @out + build_file api_avx$o autodeps $root$/src/api_avx.cpp c++ $cflags$ -c $cflags_for_avx @in -o @out +build_file sleef_avx_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -mavx -DNSIMD_AVX -DENABLE_AVX=1 @in -o @out + +build_file sleef_avx_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -mavx -DNSIMD_AVX -DENABLE_AVX=1 @in -o @out + build_file api_avx2$o autodeps $root$/src/api_avx2.cpp c++ $cflags$ -c $cflags_for_avx2 @in -o @out +build_file sleef_avx2_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -mavx2 -mfma -DNSIMD_AVX2 -DENABLE_AVX2=1 \ + @in -o @out + +build_file sleef_avx2_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -mavx2 -mfma -DNSIMD_AVX2 -DENABLE_AVX2=1 \ + @in -o @out + build_file api_avx512_knl$o autodeps $root$/src/api_avx512_knl.cpp c++ $cflags$ -c $cflags_for_avx512_knl @in -o @out +build_file sleef_avx512_knl_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -mavx512_knl -DNSIMD_AVX512_KNL \ + -DENABLE_AVX512F=1 @in -o @out + +build_file sleef_avx512_knl_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -mavx512_knl -DNSIMD_AVX512_KNL \ + -DENABLE_AVX512F=1 @in -o @out + build_file api_avx512_skylake$o autodeps $root$/src/api_avx512_skylake.cpp c++ $cflags$ -c $cflags_for_avx512_skylake @in -o @out +build_file sleef_avx512_skylake_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -mavx512_knl -DNSIMD_AVX512_SKYLAKE \ + -DENABLE_AVX512F=1 @in -o @out + +build_file sleef_avx512_skylake_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -mavx512_knl -DNSIMD_AVX512_SKYLAKE \ + -DENABLE_AVX512F=1 @in -o @out + +## ---------------------------------------------------------------------------- ## ARM 32 bits rules for building build_file api_neon128$o autodeps $root$/src/api_neon128.cpp c++ $cflags$ -c $cflags_for_neon128 @in -o @out +build_file sleef_neon128_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -mneon128 -DNSIMD_NEON128 \ + -DENABLE_NEON32=1 @in -o @out + +build_file sleef_neon128_f64$o autodeps $root$/src/sleefsimddp_emulation.c + cc $sleef_cflags$ -c -mneon128 -DNSIMD_NEON128 -DENABLE_NEON32=1 \ + -I$root$/include @in -o @out + +## ---------------------------------------------------------------------------- ## ARM 64 bits rules for building build_file api_aarch64$o autodeps $root$/src/api_aarch64.cpp c++ $cflags$ -c $cflags_for_aarch64 @in -o @out +build_file sleef_aarch64_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -maarch64 -DNSIMD_AARCH64 \ + -DENABLE_ADVSIMD=1 @in -o @out + +build_file sleef_aarch64_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -maarch64 -DNSIMD_AARCH64 \ + -DENABLE_ADVSIMD=1 @in -o @out + build_file api_sve$o autodeps $root$/src/api_sve.cpp c++ $cflags$ -c $cflags_for_sve @in -o @out +build_file sleef_sve_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -msve -DNSIMD_SVE -DENABLE_SVE=1 @in -o @out + +build_file sleef_sve_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -msve -DNSIMD_SVE -DENABLE_SVE=1 @in -o @out + build_file api_sve128$o autodeps $root$/src/api_sve128.cpp c++ $cflags$ -c $cflags_for_sve128 @in -o @out +build_file sleef_sve128_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -msve128 -DNSIMD_SVE128 -DENABLE_SVE=1 @in -o @out + +build_file sleef_sve128_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -msve128 -DNSIMD_SVE128 -DENABLE_SVE=1 @in -o @out + build_file api_sve256$o autodeps $root$/src/api_sve256.cpp c++ $cflags$ -c $cflags_for_sve256 @in -o @out +build_file sleef_sve256_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -msve256 -DNSIMD_SVE256 -DENABLE_SVE=1 @in -o @out + +build_file sleef_sve256_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -msve256 -DNSIMD_SVE256 -DENABLE_SVE=1 @in -o @out + build_file api_sve512$o autodeps $root$/src/api_sve512.cpp c++ $cflags$ -c $cflags_for_sve512 @in -o @out +build_file sleef_sve512_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -msve512 -DNSIMD_SVE512 -DENABLE_SVE=1 @in -o @out + +build_file sleef_sve512_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -msve512 -DNSIMD_SVE512 -DENABLE_SVE=1 @in -o @out + build_file api_sve1024$o autodeps $root$/src/api_sve1024.cpp c++ $cflags$ -c $cflags_for_sve1024 @in -o @out +build_file sleef_sve1024_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -msve1024 -DNSIMD_SVE1024 -DENABLE_SVE=1 \ + @in -o @out + +build_file sleef_sve1024_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -msve1024 -DNSIMD_SVE1024 -DENABLE_SVE=1 \ + @in -o @out + build_file api_sve2048$o autodeps $root$/src/api_sve2048.cpp c++ $cflags$ -c $cflags_for_sve2048 @in -o @out +build_file sleef_sve2048_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -msve2048 -DNSIMD_SVE2048 -DENABLE_SVE=1 \ + @in -o @out + +build_file sleef_sve2048_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -msve2048 -DNSIMD_SVE2048 -DENABLE_SVE=1 \ + @in -o @out + +## ---------------------------------------------------------------------------- +## POWERPC rules for building + +build_file api_vmx$o autodeps $root$/src/api_vmx.cpp + c++ $cflags$ -c $cflags_for_vmx @in -o @out + +build_file sleef_vmx_f32$o autodeps $root$/src/sleefsimdsp_emulation.c + cc $sleef_cflags$ -c -mvmx -DNSIMD_VMX -DENABLE_VSX=1 \ + -I$root$/include @in -o @out + +build_file sleef_vmx_f64$o autodeps $root$/src/sleefsimddp_emulation.c + cc $sleef_cflags$ -c -mvmx -DNSIMD_VMX -DENABLE_VSX=1 \ + -I$root$/include @in -o @out + +build_file api_vsx$o autodeps $root$/src/api_vsx.cpp + c++ $cflags$ -c $cflags_for_vsx @in -o @out + +build_file sleef_vsx_f32$o autodeps $root$/src/sleefsimdsp.c + cc $sleef_cflags$ -c -mvsx -DNSIMD_VSX -DENABLE_VSX=1 @in -o @out + +build_file sleef_vsx_f64$o autodeps $root$/src/sleefsimddp.c + cc $sleef_cflags$ -c -mvsx -DNSIMD_VSX -DENABLE_VSX=1 @in -o @out + +## ---------------------------------------------------------------------------- ## Installation and packaging install_file libnsimd_${simd}$so lib @@ -190,68 +355,110 @@ install_file libnsimd_${simd}$so lib install_dir $root$/include/nsimd include install_dir $root$/doc/html doc -begin_translate_if $build_library_only == false - +## ---------------------------------------------------------------------------- ## Tests -lambda tests_comp_for_* = c++ -set tests_comp_for_cuda = nvcc $cuda_arch_flags -set tests_comp_for_rocm = hipcc $cuda_arch_flags -set tests_comp_for_dpcpp = dpcpp - -set tests_comp = ${tests_comp_for_$simd$} - -set tests_flags = $flags$ $mpfr ${cflags_for_$simd$} -lm $ldflags - +# Lambda arguments: suite, compiler, std, simd_ext +# By default all tests will be considered +lambda tests_*_*_* = ok + +# Now disable some possibilities on certain compilers +set tests_clang_c89_vmx = "" +set tests_clang_c89_vsx = "" +set tests_clang_c89_sve = "" +lambda tests_*_c89_cuda = "" +lambda tests_*_c99_cuda = "" +lambda tests_*_c11_cuda = "" +lambda tests_*_cpp17_cuda = "" +lambda tests_*_c89_rocm = "" +lambda tests_*_c99_rocm = "" +lambda tests_*_c11_rocm = "" +lambda tests_*_cpp98_rocm = "" +lambda tests_*_cpp17_rocm = "" +lambda tests_*_c89_oneapi = "" +lambda tests_*_c99_oneapi = "" +lambda tests_*_c11_oneapi = "" +lambda tests_dpcpp_cpp98_* = "" +lambda tests_dpcpp_cpp11_* = "" + +set c89_enabled = ${tests_$ccomp$_c89_$simd$} +set c89.files = "" +set c99_enabled = ${tests_$ccomp$_c99_$simd$} +set c99.files = "" +set c11_enabled = ${tests_$ccomp$_c11_$simd$} +set c11.files = "" +set cpp98_enabled = ${tests_$cppcomp$_cpp98_$simd$} +set cpp98.files = "" +set cpp11_enabled = ${tests_$cppcomp$_cpp11_$simd$} +set cpp11.files = "" +set cpp17_enabled = ${tests_$cppcomp$_cpp17_$simd$} +set cpp17.files = "" +set cpp20.files = "" + +set tests_flags = $cuda_arch_flags $flags ${cflags_for_$simd$} -lm $ldflags echo Test compilation flags: $tests_flags$ -build_files c89 foreach glob:$root$/tests/*.c \ +[$c89_enabled$] build_files c89 foreach glob:$root$/tests/*.prec11.c \ as tests.%r.c89$exe \ autodeps @item libnsimd_$simd$$so$ - cc -std=c89 @item $tests_flags -o @out + [$c89_enabled$] cc -std=c89 @item $tests_flags -o @out + +[$c89_enabled$] phony tests.c89 deps $c89.files + -build_files c99 foreach glob:$root$/tests/*.c \ +[$c99_enabled$] build_files c99 foreach glob:$root$/tests/*.prec11.c \ as tests.%r.c99$exe \ autodeps @item libnsimd_$simd$$so$ - cc -std=c99 @item $tests_flags -o @out + [$c99_enabled$] cc -std=c99 @item $tests_flags -o @out + +[$c99_enabled$] phony tests.c99 deps $c99.files + -build_files c11 foreach glob:$root$/tests/*.c \ +[$c11_enabled$] build_files c11 foreach glob:$root$/tests/*.c \ as tests.%r.c11$exe \ autodeps @item libnsimd_$simd$$so$ - cc -std=c11 @item $tests_flags -o @out + [$c11_enabled$] cc -std=c11 @item $tests_flags -o @out -build_files cpp98 foreach glob:$root$/tests/*.cpp \ +[$c11_enabled$] phony tests.c11 deps $c11.files + + +[$cpp98_enabled$] build_files cpp98 foreach glob:$root$/tests/*.cpp \ as tests.%r.cpp98$exe \ autodeps @item libnsimd_$simd$$so$ - $tests_comp -std=c++98 @item $tests_flags -o @out + [$cpp98_enabled$] c++ -std=c++98 @item $tests_flags -o @out + +[$cpp98_enabled$] phony tests.cpp98 deps $cpp98.files + -build_files cpp11 foreach glob:$root$/tests/*.cpp \ +[$cpp11_enabled$] build_files cpp11 foreach glob:$root$/tests/*.cpp \ as tests.%r.cpp11$exe \ autodeps @item libnsimd_$simd$$so$ - $tests_comp -std=c++11 @item $tests_flags -o @out + [$cpp11_enabled$] c++ -std=c++11 @item $tests_flags -o @out -build_files cpp20 foreach glob:$root$/tests/*.cpp \ - as tests.%r.cpp20$exe \ +[$cpp11_enabled$] phony tests.cpp11 deps $cpp11.files + + +[$cpp17_enabled$] build_files cpp17 foreach glob:$root$/tests/*.cpp \ + as tests.%r.cpp17$exe \ autodeps @item libnsimd_$simd$$so$ - $tests_comp -std=c++20 @item $tests_flags -o @out + [$cpp17_enabled$] c++ -std=c++17 @item $tests_flags -o @out -phony tests.c89 deps $c89.files -phony tests.c99 deps $c99.files -phony tests.c11 deps $c11.files -phony tests.cpp98 deps $cpp98.files -phony tests.cpp11 deps $cpp11.files -phony tests.cpp20 deps $cpp20.files +[$cpp17_enabled$] phony tests.cpp17 deps $cpp17.files -# Phony target for tests -lambda phony_tests_for_* = tests.c89 tests.c99 tests.cpp98 tests.cpp11 -set phony_tests_for_cuda = tests.cpp98 tests.cpp11 -set phony_tests_for_rocm = tests.cpp11 -set phony_tests_for_dpcpp = tests.cpp11 -set phony_tests_for_sve = tests.c11 tests.cpp11 +[$cpp20_tests$] build_files cpp20 foreach glob:$root$/tests/*.cpp \ + as tests.%r.cpp20$exe \ + autodeps @item libnsimd_$simd$$so$ + [$cpp20_tests$] c++ -std=c++20 @item $tests_flags -o @out + +[$cpp20_tests$] phony tests.cpp20 deps $cpp20.files -phony tests deps ${phony_tests_for_$simd$} +# Phony target for tests +phony tests deps $c89.files $c99.files $c11.files $cpp98.files $cpp11.files \ + $cpp17.files $cpp20.files + +## ---------------------------------------------------------------------------- ## Examples build_files examples_cpp98 foreach glob:$root$/examples/*.cpp \ @@ -260,29 +467,3 @@ build_files examples_cpp98 foreach glob:$root$/examples/*.cpp \ c++ -std=c++98 @item $tests_flags -o @out phony examples.cpp98 deps $examples_cpp98.files - -## Ulps - -build_files optional ulps foreach glob:$root$/ulps/*.cpp \ - as ulp.%b.${simd}$exe \ - autodeps @item libnsimd_$simd$$so$ - c++ $flags$ $mpfr -std=c++11 @item ${cflags_for_$simd$} \ - $ldflags -o @out - -phony ulps deps $ulps.files - -## Benches - -set benches_flags = -std=c++11 ${cflags_for_$simd$} $flags $sleef $benchmark - -echo Benches compilation flags: $benches_flags - -build_files optional benches \ - foreach glob:$root$/benches/*.$simd$.*.cpp \ - as benches.%b.cpp11$exe \ - autodeps @item libnsimd_$simd$$so$ - c++ benches_flags @item -o @out - -phony benches deps $benches.files$ - -end_translate diff --git a/doc/html/assets/tex-mml-chtml.js b/doc/html/assets/tex-mml-chtml.js deleted file mode 100644 index 4e2addf2..00000000 --- a/doc/html/assets/tex-mml-chtml.js +++ /dev/null @@ -1 +0,0 @@ -!function(r){var n={};function i(t){if(n[t])return n[t].exports;var e=n[t]={i:t,l:!1,exports:{}};return r[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}i.m=r,i.c=n,i.d=function(t,e,r){i.o(t,e)||Object.defineProperty(t,e,{enumerable:!0,get:r})},i.r=function(t){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})},i.t=function(e,t){if(1&t&&(e=i(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var r=Object.create(null);if(i.r(r),Object.defineProperty(r,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var n in e)i.d(r,n,function(t){return e[t]}.bind(null,n));return r},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=230)}([function(t,s,e){"use strict";var n,r=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),c=this&&this.__assign||function(){return(c=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},m=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},v=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0 mjx-mid"]={"margin-top":this.em(-p/2),"margin-bottom":this.em(-p/2)}}l&&(h["border-top-width"]=this.em0(l-.03)),u&&(h["border-bottom-width"]=this.em0(u-.03),t[f+"mjx-stretchy-v"+e+" > mjx-end"]={"margin-top":this.em(-u)}),Object.keys(h).length&&(t[f+"mjx-stretchy-v"+e+" > mjx-ext"]=h)},s.prototype.addDelimiterVPart=function(t,e,r,n,i){if(!i)return 0;var o=this.getDelimiterData(i),a=(r-o[2])/2,s={content:this.charContent(i)};return"ext"!==n?s.padding=this.padding(o,a):a&&(s["padding-left"]=this.em0(a)),t[this.cssRoot+"mjx-stretchy-v"+e+" mjx-"+n+" mjx-c::before"]=s,o[0]+o[1]},s.prototype.addDelimiterHStyles=function(t,e,r){var n=v(r.stretch,4),i=n[0],o=n[1],a=n[2],s=n[3];this.addDelimiterHPart(t,e,"beg",i),this.addDelimiterHPart(t,e,"ext",o,!(i||a)),this.addDelimiterHPart(t,e,"end",a),s&&(this.addDelimiterHPart(t,e,"mid",s),t[this.cssRoot+"mjx-stretchy-h"+e+" > mjx-ext"]={width:"50%"})},s.prototype.addDelimiterHPart=function(t,e,r,n,i){if(void 0===i&&(i=!1),!n)return 0;var o=this.getDelimiterData(n),a=o[3],s={content:a&&a.c?'"'+a.c+'"':this.charContent(n)};"ext"===r&&!i||(s.padding=this.padding(o,0,-o[2])),t[this.cssRoot+"mjx-stretchy-h"+e+" mjx-"+r+" mjx-c::before"]=s},s.prototype.addCharStyles=function(t,e,r,n,i){var o=v(n,4),a=(o[0],o[1],o[2]),s=o[3];if(!this.options.adaptiveCSS||s.used){var c={},l="mjx-c"+this.charSelector(r),u=this.cssRoot;c.padding=this.padding(n,0,s.ic||0);var h=s.c?'"'+s.c+'"':this.charContent(r);i.get(r)!==h&&(i.has(r)||s.c?t[u+e+" "+l+"::before"]={content:h}:(t[u+l+"::before"]={content:h},i.set(r,h))),void 0!==s.f&&(c["font-family"]="MJXZERO, MJXTEX"+(s.f?"-"+s.f:""));var f=(e?e+" ":"")+l;if(t[u+f]=c,s.ic){var p=v([u+"mjx-","[noIC]"+f+":last-child"],2),d=p[0],m=p[1];t[d+"mi"+m]=t[d+"mo"+m]={"padding-right":this.em(a)}}}},s.prototype.getDelimiterData=function(t){return this.getChar("-smallop",t)},s.charOptions=function(t,e){return h.charOptions.call(this,t,e)},s.prototype.em=function(t){return o.em(t)},s.prototype.em0=function(t){return o.em(Math.max(0,t))},s.prototype.padding=function(t,e,r){var n=v(t,3),i=n[0],o=n[1];return void 0===e&&(e=0),void 0===r&&(r=0),[i,n[2]+r,o,e].map(this.em0).join(" ")},s.prototype.charContent=function(t){return'"'+(32<=t&&t<=126&&34!==t&&39!==t&&92!==t?String.fromCharCode(t):"\\"+t.toString(16).toUpperCase())+'"'},s.prototype.charSelector=function(t){return".mjx-c"+t.toString(16).toUpperCase()},s.OPTIONS={fontURL:"js/output/chtml/fonts/tex-woff-v2"},s.defaultVariantClasses={},s.defaultStyles={"mjx-c::before":{display:"inline-block",width:0}},s.defaultFonts={"@font-face /* 0 */":{"font-family":"MJXZERO",src:'url("%%URL%%/MathJax_Zero.woff") format("woff")'}},s);function s(t){var e,r;void 0===t&&(t=null);var n=h.call(this)||this;n.cssRoot="";var i=n.constructor;n.options=u.userOptions(u.defaultOptions({},i.OPTIONS),t);try{for(var o=y(Object.keys(i.defaultVariantClasses)),a=o.next();!a.done;a=o.next()){var s=a.value;n.variant[s].classes=i.defaultVariantClasses[s]}}catch(t){e={error:t}}finally{try{a&&!a.done&&(r=o.return)&&r.call(o)}finally{if(e)throw e.error}}return n}r.CHTMLFontData=a,r.AddCSS=function(t,e){var r,n;try{for(var i=y(Object.keys(e)),o=i.next();!o.done;o=i.next()){var a=o.value,s=parseInt(a);Object.assign(c.FontData.charOptions(t,s),e[s])}}catch(t){r={error:t}}finally{try{o&&!o.done&&(n=i.return)&&n.call(i)}finally{if(r)throw r.error}}return t}},function(t,u,e){"use strict";var n,r,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),h=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},f=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},r=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};function s(t,e){var r,n;try{for(var i=l(Object.keys(e)),o=i.next();!o.done;o=i.next()){var a=o.value;"__esModule"!==a&&("object"==typeof t[a]&&"object"==typeof e[a]?s(t[a],e[a]):null!==e[a]&&void 0!==e[a]&&(t[a]=e[a]))}}catch(t){r={error:t}}finally{try{o&&!o.done&&(n=i.return)&&n.call(i)}finally{if(r)throw r.error}}return t}Object.defineProperty(e,"__esModule",{value:!0}),e.combineConfig=s,e.combineDefaults=function t(e,r,n){var i,o;e[r]||(e[r]={}),e=e[r];try{for(var a=l(Object.keys(n)),s=a.next();!s.done;s=a.next()){var c=s.value;"object"==typeof e[c]&&"object"==typeof n[c]?t(e,c,n[c]):null==e[c]&&null!=n[c]&&(e[c]=n[c])}}catch(t){i={error:t}}finally{try{s&&!s.done&&(o=a.return)&&o.call(a)}finally{if(i)throw i.error}}return e},e.combineWithMathJax=function(t){return s(e.MathJax,t)},void 0===t.MathJax&&(t.MathJax={}),t.MathJax.version||(t.MathJax={version:"3.0.0",_:{},config:t.MathJax}),e.MathJax=t.MathJax}).call(this,r(28))},function(t,e,r){"use strict";var l=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},n=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var n,i,a,o,s,c,h,f=r(0),p=r(6),d=r(21),m=r(4),y=r(12);function v(t,e){void 0===e&&(e=!1);var r=t.match(e?h:c);return r?[r[1].replace(/,/,"."),r[4],r[0].length]:[null,null,0]}function b(t,e,r){"{"!==e&&"}"!==e||(e="\\"+e);var n="{\\bigg"+r+" "+e+"}",i="{\\big"+r+" "+e+"}";return new d.default("\\mathchoice"+n+i+i+i,{},t).mml()}function g(t,e,r){e=e.replace(/^\s+/,y.entities.nbsp).replace(/\s+$/,y.entities.nbsp);var n=t.create("text",e);return t.create("node","mtext",[],r,n)}function M(t,e,r){if(r.match(/^[a-z]/i)&&e.match(/(^|[^\\])(\\\\)*\\[a-z]+$/i)&&(e+=" "),e.length+r.length>t.configuration.options.maxBuffer)throw new m.default("MaxBufferSize","MathJax internal buffer size exceeded; is there a recursive macro call?");return e+r}function O(t,e){for(;0e.length)throw new m.default("IllegalMacroParam","Illegal macro parameter reference");i=M(t,M(t,i,n),e[parseInt(a,10)-1]),n=""}else n+=a}return M(t,i,n)},i.addArgs=M,i.checkEqnEnv=function(t){if(t.stack.global.eqnenv)throw new m.default("ErroneousNestingEq","Erroneous nesting of equation structures");t.stack.global.eqnenv=!0},i.MmlFilterAttribute=function(t,e,r){return r},i.getFontDef=function(t){var e=t.stack.env.font;return e?{mathvariant:e}:{}},i.keyvalOptions=function(t,e,r){var n,i;void 0===e&&(e=null),void 0===r&&(r=!1);var o=function(t){for(var e,r,n,i,o,a={},s=t;s;)e=l(x(s,["=",","]),3),i=e[0],n=e[1],s=e[2],"="===n?(r=l(x(s,[","]),3),o=r[0],n=r[1],s=r[2],o="false"===o||"true"===o?JSON.parse(o):o,a[i]=o):i&&(a[i]=!0);return a}(t);if(e)try{for(var a=u(Object.keys(o)),s=a.next();!s.done;s=a.next()){var c=s.value;if(!e.hasOwnProperty(c)){if(r)throw new m.default("InvalidOption","Invalid optional argument: %1",c);delete o[c]}}}catch(t){n={error:t}}finally{try{s&&!s.done&&(i=a.return)&&i.call(a)}finally{if(n)throw n.error}}return o},e.default=n},function(t,e,r){"use strict";var n,i,o,l=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},u=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},c=this&&this.__spread||function(){for(var t=[],e=0;e=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var b,n,i,o=r(33),a=r(8),L=r(3),s=r(9),g=r(8),M=r(25),c=(l.create=function(t,e){return void 0===e&&(e={}),new l(t,e.handler||{},e.fallback||{},e.items||{},e.tags||{},e.options||{},e.nodes||{},e.preprocessors||[],e.postprocessors||[],[e.init,e.priority],[e.config,e.configPriority])},l.empty=function(){return l.create("empty")},l.extension=function(){return new s.MacroMap(a.ExtensionMaps.NEW_MACRO,{},{}),new s.DelimiterMap(a.ExtensionMaps.NEW_DELIMITER,o.default.delimiter,{}),new s.CommandMap(a.ExtensionMaps.NEW_COMMAND,{},{}),new s.EnvironmentMap(a.ExtensionMaps.NEW_ENVIRONMENT,o.default.environment,{},{}),l.create("extension",{handler:{character:[],delimiter:[a.ExtensionMaps.NEW_DELIMITER],macro:[a.ExtensionMaps.NEW_DELIMITER,a.ExtensionMaps.NEW_COMMAND,a.ExtensionMaps.NEW_MACRO],environment:[a.ExtensionMaps.NEW_ENVIRONMENT]}})},l.prototype.init=function(t){this.initMethod.execute(t)},l.prototype.config=function(t,e){var r,n,i,o;this.configMethod.execute(t,e);try{for(var a=I(this.preprocessors),s=a.next();!s.done;s=a.next()){var c=s.value;"function"==typeof c?e.preFilters.add(c):e.preFilters.add(c[0],c[1])}}catch(t){r={error:t}}finally{try{s&&!s.done&&(n=a.return)&&n.call(a)}finally{if(r)throw r.error}}try{for(var l=I(this.postprocessors),u=l.next();!u.done;u=l.next()){var h=u.value;"function"==typeof h?e.postFilters.add(h):e.postFilters.add(h[0],h[1])}}catch(t){i={error:t}}finally{try{u&&!u.done&&(o=l.return)&&o.call(l)}finally{if(i)throw i.error}}},l.prototype.append=function(t){var e,r,n,i,o,a,s,c,l,u,h,f,p=Object.keys(t.handler);try{for(var d=I(p),m=d.next();!m.done;m=d.next()){var y=m.value;try{for(var v=(n=void 0,I(t.handler[y])),b=v.next();!b.done;b=v.next()){var g=b.value;this.handler[y].unshift(g)}}catch(t){n={error:t}}finally{try{b&&!b.done&&(i=v.return)&&i.call(v)}finally{if(n)throw n.error}}}}catch(t){e={error:t}}finally{try{m&&!m.done&&(r=d.return)&&r.call(d)}finally{if(e)throw e.error}}Object.assign(this.fallback,t.fallback),Object.assign(this.items,t.items),Object.assign(this.tags,t.tags),L.defaultOptions(this.options,t.options),Object.assign(this.nodes,t.nodes);try{for(var M=I(t.preprocessors),O=M.next();!O.done;O=M.next()){var x=O.value;this.preprocessors.push(x)}}catch(t){o={error:t}}finally{try{O&&!O.done&&(a=M.return)&&a.call(M)}finally{if(o)throw o.error}}try{for(var S=I(t.postprocessors),E=S.next();!E.done;E=S.next()){var C=E.value;this.postprocessors.push(C)}}catch(t){s={error:t}}finally{try{E&&!E.done&&(c=S.return)&&c.call(S)}finally{if(s)throw s.error}}try{for(var _=I(t.initMethod),T=_.next();!T.done;T=_.next()){var w=T.value;this.initMethod.add(w.item,w.priority)}}catch(t){l={error:t}}finally{try{T&&!T.done&&(u=_.return)&&u.call(_)}finally{if(l)throw l.error}}try{for(var A=I(t.configMethod),k=A.next();!k.done;k=A.next())w=k.value,this.configMethod.add(w.item,w.priority)}catch(t){h={error:t}}finally{try{k&&!k.done&&(f=A.return)&&f.call(A)}finally{if(h)throw h.error}}},l.prototype.register=function(t,e,r){var n,i,o,a,s,c;void 0===r&&(r={}),this.append(t),t.init(this);var l=e.parseOptions;l.handlers=new g.SubHandlers(this),l.nodeFactory.setCreators(t.nodes);try{for(var u=I(Object.keys(t.items)),h=u.next();!h.done;h=u.next()){var f=h.value;l.itemFactory.setNodeClass(f,t.items[f])}}catch(t){n={error:t}}finally{try{h&&!h.done&&(i=u.return)&&i.call(u)}finally{if(n)throw n.error}}L.defaultOptions(l.options,t.options),L.userOptions(l.options,r),t.config(this,e);try{for(var p=I(t.preprocessors),d=p.next();!d.done;d=p.next()){var m=d.value;Array.isArray(m)?e.preFilters.add(m[0],m[1]):e.preFilters.add(m)}}catch(t){o={error:t}}finally{try{d&&!d.done&&(a=p.return)&&a.call(p)}finally{if(o)throw o.error}}try{for(var y=I(t.postprocessors),v=y.next();!v.done;v=y.next()){var b=v.value;Array.isArray(b)?e.postFilters.add(b[0],b[1]):e.postFilters.add(b)}}catch(t){s={error:t}}finally{try{v&&!v.done&&(c=y.return)&&c.call(y)}finally{if(s)throw s.error}}},l);function l(t,e,r,n,i,o,a,s,c,l,u){void 0===e&&(e={}),void 0===r&&(r={}),void 0===n&&(n={}),void 0===i&&(i={}),void 0===o&&(o={}),void 0===a&&(a={}),void 0===s&&(s=[]),void 0===c&&(c=[]);var h=v(l,2),f=h[0],p=h[1],d=v(u,2),m=d[0],y=d[1];this.name=t,this.handler=e,this.fallback=r,this.items=n,this.tags=i,this.options=o,this.nodes=a,this.preprocessors=s,this.postprocessors=c,this.initMethod=new M.FunctionList,this.configMethod=new M.FunctionList,f&&this.initMethod.add(f,p||0),m&&this.configMethod.add(m,y||p||0),this.handler=Object.assign({character:[],delimiter:[],macro:[],environment:[]},e),b.set(t,this)}e.Configuration=c,n=b=e.ConfigurationHandler||(e.ConfigurationHandler={}),i=new Map,n.set=function(t,e){i.set(t,e)},n.get=function(t){return i.get(t)},n.keys=function(){return i.keys()}},function(t,n,e){"use strict";Object.defineProperty(n,"__esModule",{value:!0});var i=e(69),o=e(103);n.options={loadMissingEntities:!0},n.entities={ApplyFunction:"\u2061",Backslash:"\u2216",Because:"\u2235",Breve:"\u02d8",Cap:"\u22d2",CenterDot:"\xb7",CircleDot:"\u2299",CircleMinus:"\u2296",CirclePlus:"\u2295",CircleTimes:"\u2297",Congruent:"\u2261",ContourIntegral:"\u222e",Coproduct:"\u2210",Cross:"\u2a2f",Cup:"\u22d3",CupCap:"\u224d",Dagger:"\u2021",Del:"\u2207",Delta:"\u0394",Diamond:"\u22c4",DifferentialD:"\u2146",DotEqual:"\u2250",DoubleDot:"\xa8",DoubleRightTee:"\u22a8",DoubleVerticalBar:"\u2225",DownArrow:"\u2193",DownLeftVector:"\u21bd",DownRightVector:"\u21c1",DownTee:"\u22a4",Downarrow:"\u21d3",Element:"\u2208",EqualTilde:"\u2242",Equilibrium:"\u21cc",Exists:"\u2203",ExponentialE:"\u2147",FilledVerySmallSquare:"\u25aa",ForAll:"\u2200",Gamma:"\u0393",Gg:"\u22d9",GreaterEqual:"\u2265",GreaterEqualLess:"\u22db",GreaterFullEqual:"\u2267",GreaterLess:"\u2277",GreaterSlantEqual:"\u2a7e",GreaterTilde:"\u2273",Hacek:"\u02c7",Hat:"^",HumpDownHump:"\u224e",HumpEqual:"\u224f",Im:"\u2111",ImaginaryI:"\u2148",Integral:"\u222b",Intersection:"\u22c2",InvisibleComma:"\u2063",InvisibleTimes:"\u2062",Lambda:"\u039b",Larr:"\u219e",LeftAngleBracket:"\u27e8",LeftArrow:"\u2190",LeftArrowRightArrow:"\u21c6",LeftCeiling:"\u2308",LeftDownVector:"\u21c3",LeftFloor:"\u230a",LeftRightArrow:"\u2194",LeftTee:"\u22a3",LeftTriangle:"\u22b2",LeftTriangleEqual:"\u22b4",LeftUpVector:"\u21bf",LeftVector:"\u21bc",Leftarrow:"\u21d0",Leftrightarrow:"\u21d4",LessEqualGreater:"\u22da",LessFullEqual:"\u2266",LessGreater:"\u2276",LessSlantEqual:"\u2a7d",LessTilde:"\u2272",Ll:"\u22d8",Lleftarrow:"\u21da",LongLeftArrow:"\u27f5",LongLeftRightArrow:"\u27f7",LongRightArrow:"\u27f6",Longleftarrow:"\u27f8",Longleftrightarrow:"\u27fa",Longrightarrow:"\u27f9",Lsh:"\u21b0",MinusPlus:"\u2213",NestedGreaterGreater:"\u226b",NestedLessLess:"\u226a",NotDoubleVerticalBar:"\u2226",NotElement:"\u2209",NotEqual:"\u2260",NotExists:"\u2204",NotGreater:"\u226f",NotGreaterEqual:"\u2271",NotLeftTriangle:"\u22ea",NotLeftTriangleEqual:"\u22ec",NotLess:"\u226e",NotLessEqual:"\u2270",NotPrecedes:"\u2280",NotPrecedesSlantEqual:"\u22e0",NotRightTriangle:"\u22eb",NotRightTriangleEqual:"\u22ed",NotSubsetEqual:"\u2288",NotSucceeds:"\u2281",NotSucceedsSlantEqual:"\u22e1",NotSupersetEqual:"\u2289",NotTilde:"\u2241",NotVerticalBar:"\u2224",Omega:"\u03a9",OverBar:"\u203e",OverBrace:"\u23de",PartialD:"\u2202",Phi:"\u03a6",Pi:"\u03a0",PlusMinus:"\xb1",Precedes:"\u227a",PrecedesEqual:"\u2aaf",PrecedesSlantEqual:"\u227c",PrecedesTilde:"\u227e",Product:"\u220f",Proportional:"\u221d",Psi:"\u03a8",Rarr:"\u21a0",Re:"\u211c",ReverseEquilibrium:"\u21cb",RightAngleBracket:"\u27e9",RightArrow:"\u2192",RightArrowLeftArrow:"\u21c4",RightCeiling:"\u2309",RightDownVector:"\u21c2",RightFloor:"\u230b",RightTee:"\u22a2",RightTeeArrow:"\u21a6",RightTriangle:"\u22b3",RightTriangleEqual:"\u22b5",RightUpVector:"\u21be",RightVector:"\u21c0",Rightarrow:"\u21d2",Rrightarrow:"\u21db",Rsh:"\u21b1",Sigma:"\u03a3",SmallCircle:"\u2218",Sqrt:"\u221a",Square:"\u25a1",SquareIntersection:"\u2293",SquareSubset:"\u228f",SquareSubsetEqual:"\u2291",SquareSuperset:"\u2290",SquareSupersetEqual:"\u2292",SquareUnion:"\u2294",Star:"\u22c6",Subset:"\u22d0",SubsetEqual:"\u2286",Succeeds:"\u227b",SucceedsEqual:"\u2ab0",SucceedsSlantEqual:"\u227d",SucceedsTilde:"\u227f",SuchThat:"\u220b",Sum:"\u2211",Superset:"\u2283",SupersetEqual:"\u2287",Supset:"\u22d1",Therefore:"\u2234",Theta:"\u0398",Tilde:"\u223c",TildeEqual:"\u2243",TildeFullEqual:"\u2245",TildeTilde:"\u2248",UnderBar:"_",UnderBrace:"\u23df",Union:"\u22c3",UnionPlus:"\u228e",UpArrow:"\u2191",UpDownArrow:"\u2195",UpTee:"\u22a5",Uparrow:"\u21d1",Updownarrow:"\u21d5",Upsilon:"\u03a5",Vdash:"\u22a9",Vee:"\u22c1",VerticalBar:"\u2223",VerticalTilde:"\u2240",Vvdash:"\u22aa",Wedge:"\u22c0",Xi:"\u039e",amp:"&",acute:"\xb4",aleph:"\u2135",alpha:"\u03b1",amalg:"\u2a3f",and:"\u2227",ang:"\u2220",angmsd:"\u2221",angsph:"\u2222",ape:"\u224a",backprime:"\u2035",backsim:"\u223d",backsimeq:"\u22cd",beta:"\u03b2",beth:"\u2136",between:"\u226c",bigcirc:"\u25ef",bigodot:"\u2a00",bigoplus:"\u2a01",bigotimes:"\u2a02",bigsqcup:"\u2a06",bigstar:"\u2605",bigtriangledown:"\u25bd",bigtriangleup:"\u25b3",biguplus:"\u2a04",blacklozenge:"\u29eb",blacktriangle:"\u25b4",blacktriangledown:"\u25be",blacktriangleleft:"\u25c2",bowtie:"\u22c8",boxdl:"\u2510",boxdr:"\u250c",boxminus:"\u229f",boxplus:"\u229e",boxtimes:"\u22a0",boxul:"\u2518",boxur:"\u2514",bsol:"\\",bull:"\u2022",cap:"\u2229",check:"\u2713",chi:"\u03c7",circ:"\u02c6",circeq:"\u2257",circlearrowleft:"\u21ba",circlearrowright:"\u21bb",circledR:"\xae",circledS:"\u24c8",circledast:"\u229b",circledcirc:"\u229a",circleddash:"\u229d",clubs:"\u2663",colon:":",comp:"\u2201",ctdot:"\u22ef",cuepr:"\u22de",cuesc:"\u22df",cularr:"\u21b6",cup:"\u222a",curarr:"\u21b7",curlyvee:"\u22ce",curlywedge:"\u22cf",dagger:"\u2020",daleth:"\u2138",ddarr:"\u21ca",deg:"\xb0",delta:"\u03b4",digamma:"\u03dd",div:"\xf7",divideontimes:"\u22c7",dot:"\u02d9",doteqdot:"\u2251",dotplus:"\u2214",dotsquare:"\u22a1",dtdot:"\u22f1",ecir:"\u2256",efDot:"\u2252",egs:"\u2a96",ell:"\u2113",els:"\u2a95",empty:"\u2205",epsi:"\u03b5",epsiv:"\u03f5",erDot:"\u2253",eta:"\u03b7",eth:"\xf0",flat:"\u266d",fork:"\u22d4",frown:"\u2322",gEl:"\u2a8c",gamma:"\u03b3",gap:"\u2a86",gimel:"\u2137",gnE:"\u2269",gnap:"\u2a8a",gne:"\u2a88",gnsim:"\u22e7",gt:">",gtdot:"\u22d7",harrw:"\u21ad",hbar:"\u210f",hellip:"\u2026",hookleftarrow:"\u21a9",hookrightarrow:"\u21aa",imath:"\u0131",infin:"\u221e",intcal:"\u22ba",iota:"\u03b9",jmath:"\u0237",kappa:"\u03ba",kappav:"\u03f0",lEg:"\u2a8b",lambda:"\u03bb",lap:"\u2a85",larrlp:"\u21ab",larrtl:"\u21a2",lbrace:"{",lbrack:"[",le:"\u2264",leftleftarrows:"\u21c7",leftthreetimes:"\u22cb",lessdot:"\u22d6",lmoust:"\u23b0",lnE:"\u2268",lnap:"\u2a89",lne:"\u2a87",lnsim:"\u22e6",longmapsto:"\u27fc",looparrowright:"\u21ac",lowast:"\u2217",loz:"\u25ca",lt:"<",ltimes:"\u22c9",ltri:"\u25c3",macr:"\xaf",malt:"\u2720",mho:"\u2127",mu:"\u03bc",multimap:"\u22b8",nLeftarrow:"\u21cd",nLeftrightarrow:"\u21ce",nRightarrow:"\u21cf",nVDash:"\u22af",nVdash:"\u22ae",natur:"\u266e",nearr:"\u2197",nharr:"\u21ae",nlarr:"\u219a",not:"\xac",nrarr:"\u219b",nu:"\u03bd",nvDash:"\u22ad",nvdash:"\u22ac",nwarr:"\u2196",omega:"\u03c9",omicron:"\u03bf",or:"\u2228",osol:"\u2298",period:".",phi:"\u03c6",phiv:"\u03d5",pi:"\u03c0",piv:"\u03d6",prap:"\u2ab7",precnapprox:"\u2ab9",precneqq:"\u2ab5",precnsim:"\u22e8",prime:"\u2032",psi:"\u03c8",quot:'"',rarrtl:"\u21a3",rbrace:"}",rbrack:"]",rho:"\u03c1",rhov:"\u03f1",rightrightarrows:"\u21c9",rightthreetimes:"\u22cc",ring:"\u02da",rmoust:"\u23b1",rtimes:"\u22ca",rtri:"\u25b9",scap:"\u2ab8",scnE:"\u2ab6",scnap:"\u2aba",scnsim:"\u22e9",sdot:"\u22c5",searr:"\u2198",sect:"\xa7",sharp:"\u266f",sigma:"\u03c3",sigmav:"\u03c2",simne:"\u2246",smile:"\u2323",spades:"\u2660",sub:"\u2282",subE:"\u2ac5",subnE:"\u2acb",subne:"\u228a",supE:"\u2ac6",supnE:"\u2acc",supne:"\u228b",swarr:"\u2199",tau:"\u03c4",theta:"\u03b8",thetav:"\u03d1",tilde:"\u02dc",times:"\xd7",triangle:"\u25b5",triangleq:"\u225c",upsi:"\u03c5",upuparrows:"\u21c8",veebar:"\u22bb",vellip:"\u22ee",weierp:"\u2118",xi:"\u03be",yen:"\xa5",zeta:"\u03b6",zigrarr:"\u21dd"};var a={};function r(t,e){if("#"===e.charAt(0))return s(e.slice(1));if(n.entities[e])return n.entities[e];if(n.options.loadMissingEntities){var r=e.match(/^[a-zA-Z](fr|scr|opf)$/)?RegExp.$1:e.charAt(0).toLowerCase();a[r]||(a[r]=!0,i.retryAfter(o.asyncLoad("./util/entities/"+r+".js")))}return t}function s(t){var e="x"===t.charAt(0)?parseInt(t.slice(1),16):parseInt(t);if(e<65536)return String.fromCharCode(e);var r=55296+((e-=65536)>>10),n=56320+(1023&e);return String.fromCharCode(r,n)}n.add=function(t,e){Object.assign(n.entities,t),a[e]=!0},n.remove=function(t){delete n.entities[t]},n.translate=function(t){return t.replace(/&([a-z][a-z0-9]*|#(?:[0-9]+|x[0-9a-f]+));/gi,r)},n.numeric=s},function(t,o,e){"use strict";Object.defineProperty(o,"__esModule",{value:!0}),o.protoItem=function(t,e,r,n,i,o,a){return void 0===a&&(a=null),{open:t,math:e,close:r,n:n,start:{n:i},end:{n:o},display:a}};var r=(n.prototype.render=function(t){t.renderActions.renderMath(this,t)},n.prototype.rerender=function(t,e){void 0===e&&(e=o.STATE.RERENDER),this.state()>=e&&this.state(e-1),t.renderActions.renderMath(this,t,e)},n.prototype.convert=function(t,e){void 0===e&&(e=o.STATE.LAST),t.renderActions.renderConvert(this,t,e)},n.prototype.compile=function(t){this.state()=o.STATE.INSERTED&&this.removeFromDocument(e),t=o.STATE.TYPESET&&(this.bbox={},this.outputData={}),t=o.STATE.COMPILED&&(this.inputData={}),this._state=t),this._state},n.prototype.reset=function(t){void 0===t&&(t=!1),this.state(o.STATE.UNPROCESSED)},n);function n(t,e,r,n,i){void 0===r&&(r=!0),void 0===n&&(n={i:0,n:0,delim:""}),void 0===i&&(i={i:0,n:0,delim:""}),this.root=null,this.typesetRoot=null,this._state=o.STATE.UNPROCESSED,this.metrics={},this.bbox={},this.inputData={},this.outputData={},this.math=t,this.inputJax=e,this.display=r,this.start=n,this.end=i,this.root=null,this.typesetRoot=null,this.metrics={},this.bbox={},this.inputData={},this.outputData={}}o.AbstractMathItem=r,o.STATE={UNPROCESSED:0,FINDMATH:10,COMPILED:20,CONVERT:100,METRICS:110,RERENDER:125,TYPESET:150,INSERTED:200,RESET:500,LAST:1e4},o.newState=function(t,e){if(t in o.STATE)throw Error("State "+t+" already exists");o.STATE[t]=e}},function(t,s,e){"use strict";Object.defineProperty(s,"__esModule",{value:!0}),s.BIGDIMEN=1e6,s.UNITS={px:1,pt:96/72,pc:8,in:96,cm:96/2.54,mm:96/25.4},s.RELUNITS={em:1,ex:.431,mu:1/18},s.MATHSPACE={veryverythinmathspace:1/18,verythinmathspace:2/18,thinmathspace:3/18,mediummathspace:4/18,thickmathspace:5/18,verythickmathspace:6/18,veryverythickmathspace:7/18,negativeveryverythinmathspace:-1/18,negativeverythinmathspace:-2/18,negativethinmathspace:-3/18,negativemediummathspace:-4/18,negativethickmathspace:-5/18,negativeverythickmathspace:-6/18,negativeveryverythickmathspace:-7/18,thin:.04,medium:.06,thick:.1,normal:1,big:2,small:1/Math.sqrt(2),infinity:s.BIGDIMEN},s.length2em=function(t,e,r,n){if(void 0===e&&(e=0),void 0===r&&(r=1),void 0===n&&(n=16),"string"!=typeof t&&(t=String(t)),""===t||null==t)return e;if(s.MATHSPACE[t])return s.MATHSPACE[t];var i=t.match(/^\s*([-+]?(?:\.\d+|\d+(?:\.\d*)?))?(pt|em|ex|mu|px|pc|in|mm|cm|%)?/);if(!i)return e;var o=parseFloat(i[1]||"1"),a=i[2];return s.UNITS.hasOwnProperty(a)?o*s.UNITS[a]/n/r:s.RELUNITS.hasOwnProperty(a)?o*s.RELUNITS[a]:"%"===a?o/100*e:o*e},s.percent=function(t){return(100*t).toFixed(1).replace(/\.?0+$/,"")+"%"},s.em=function(t){return Math.abs(t)<.001?"0":t.toFixed(3).replace(/\.?0+$/,"")+"em"},s.emRounded=function(t,e){return void 0===e&&(e=16),t=(Math.round(t*e)+.05)/e,Math.abs(t)<.001?"0em":t.toFixed(3).replace(/\.?0+$/,"")+"em"},s.px=function(t,e,r){return void 0===e&&(e=-s.BIGDIMEN),void 0===r&&(r=16),t*=r,e&&tthis.w&&(this.w=i),o>this.h&&(this.h=o),a>this.d&&(this.d=a)},o.prototype.append=function(t){var e=t.rscale;this.w+=e*(t.w+t.L+t.R),e*t.h>this.h&&(this.h=e*t.h),e*t.d>this.d&&(this.d=e*t.d)},o.prototype.updateFrom=function(t){this.h=t.h,this.d=t.d,this.w=t.w,t.pwidth&&(this.pwidth=t.pwidth)},o.fullWidth="100%",o);function o(t){void 0===t&&(t={w:0,h:-n.BIGDIMEN,d:-n.BIGDIMEN}),this.w=t.w||0,this.h="h"in t?t.h:-n.BIGDIMEN,this.d="d"in t?t.d:-n.BIGDIMEN,this.L=this.R=this.ic=this.sk=0,this.scale=this.rscale=1,this.pwidth=""}e.BBox=i},function(t,h,o){"use strict";(function(r){var l=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(h,"__esModule",{value:!0});var t,e,n=o(5),u=o(18),i=o(18);h.Package=i.Package,h.PackageError=i.PackageError,(e=t=h.Loader||(h.Loader={})).ready=function(){for(var e,t,r=[],n=0;n=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var a,p=r(17),s=(a=Error,i(c,a),c);function c(t,e){var r=a.call(this,t)||this;return r.package=e,r}e.PackageError=s;var l=(d.resolvePath=function(t,e){void 0===e&&(e=!0);var r,n=p.CONFIG.source[t]||t;for(n.match(/^(?:[a-z]+:\/)?\/|\[/)||(n="[mathjax]/"+n.replace(/^\.\//,"")),e&&!n.match(/\.[^\/]+$/)&&(n+=".js");(r=n.match(/^\[([^\]]*)\]/))&&p.CONFIG.paths.hasOwnProperty(r[1]);)n=p.CONFIG.paths[r[1]]+n.substr(r[0].length);return n},Object.defineProperty(d.prototype,"canLoad",{get:function(){return 0===this.dependencyCount&&!this.noLoad&&!this.isLoading&&!this.hasFailed},enumerable:!0,configurable:!0}),d.prototype.makeDependencies=function(){var e,t,r=[],n=d.packages,i=this.noLoad,o=this.name,a=[];p.CONFIG.dependencies.hasOwnProperty(o)?a.push.apply(a,h(p.CONFIG.dependencies[o])):"core"!==o&&a.push("core");try{for(var s=f(a),c=s.next();!c.done;c=s.next()){var l=c.value,u=n.get(l)||new d(l,i);this.dependencies.indexOf(u)<0&&(u.addDependent(this,i),this.dependencies.push(u),u.isLoaded||(this.dependencyCount++,r.push(u.promise)))}}catch(t){e={error:t}}finally{try{c&&!c.done&&(t=s.return)&&t.call(s)}finally{if(e)throw e.error}}return r},d.prototype.makePromise=function(t){var r=this,e=new Promise(function(t,e){r.resolve=t,r.reject=e}),n=p.CONFIG[this.name]||{};return n.ready&&(e=e.then(function(t){return n.ready(r.name)})),t.length&&(t.push(e),e=Promise.all(t).then(function(t){return t.join(", ")})),n.failed&&e.catch(function(t){return n.failed(new s(t,r.name))}),e},d.prototype.load=function(){if(!this.isLoaded&&!this.isLoading&&!this.noLoad){this.isLoading=!0;var t=d.resolvePath(this.name);p.CONFIG.require?this.loadCustom(t):this.loadScript(t)}},d.prototype.loadCustom=function(t){var e=this;try{var r=p.CONFIG.require(t);r instanceof Promise?r.then(function(){return e.checkLoad()}).catch(function(){return e.failed("Can't load \""+t+'"')}):this.checkLoad()}catch(t){this.failed(t.message)}},d.prototype.loadScript=function(e){var r=this,t=document.createElement("script");t.src=e,t.charset="UTF-8",t.onload=function(t){return r.checkLoad()},t.onerror=function(t){return r.failed("Can't load \""+e+'"')},document.head.appendChild(t)},d.prototype.loaded=function(){var e,t,r,n;this.isLoaded=!0,this.isLoading=!1;try{for(var i=f(this.dependents),o=i.next();!o.done;o=i.next())o.value.requirementSatisfied()}catch(t){e={error:t}}finally{try{o&&!o.done&&(t=i.return)&&t.call(i)}finally{if(e)throw e.error}}try{for(var a=f(this.provided),s=a.next();!s.done;s=a.next())s.value.loaded()}catch(t){r={error:t}}finally{try{s&&!s.done&&(n=a.return)&&n.call(a)}finally{if(r)throw r.error}}this.resolve(this.name)},d.prototype.failed=function(t){this.hasFailed=!0,this.isLoading=!1,this.reject(new s(t,this.name))},d.prototype.checkLoad=function(){var e=this;((p.CONFIG[this.name]||{}).checkReady||function(){return Promise.resolve()})().then(function(){return e.loaded()}).catch(function(t){return e.failed(t)})},d.prototype.requirementSatisfied=function(){this.dependencyCount&&(this.dependencyCount--,this.canLoad&&this.load())},d.prototype.provides=function(t){var e,r;void 0===t&&(t=[]);try{for(var n=f(t),i=n.next();!i.done;i=n.next()){var o=i.value,a=d.packages.get(o);a||(p.CONFIG.dependencies[o]||(p.CONFIG.dependencies[o]=[]),p.CONFIG.dependencies[o].push(o),(a=new d(o,!0)).isLoading=!0),this.provided.push(a)}}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}},d.prototype.addDependent=function(t,e){this.dependents.push(t),e||this.checkNoLoad()},d.prototype.checkNoLoad=function(){var e,t;if(this.noLoad){this.noLoad=!1;try{for(var r=f(this.dependencies),n=r.next();!n.done;n=r.next())n.value.checkNoLoad()}catch(t){e={error:t}}finally{try{n&&!n.done&&(t=r.return)&&t.call(r)}finally{if(e)throw e.error}}}},d.loadAll=function(){var e,t;try{for(var r=f(this.packages.values()),n=r.next();!n.done;n=r.next()){var i=n.value;i.canLoad&&i.load()}}catch(t){e={error:t}}finally{try{n&&!n.done&&(t=r.return)&&t.call(r)}finally{if(e)throw e.error}}},d.packages=new Map,d);function d(t,e){void 0===e&&(e=!1),this.isLoaded=!1,this.isLoading=!1,this.hasFailed=!1,this.dependents=[],this.dependencies=[],this.dependencyCount=0,this.provided=[],this.name=t,this.noLoad=e,d.packages.set(t,this),this.promise=this.makePromise(this.makeDependencies())}e.Package=l},function(t,r,e){"use strict";var c=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(r,"__esModule",{value:!0}),r.INHERIT="_inherit_";var n=(i.prototype.set=function(t,e){this.attributes[t]=e},i.prototype.setList=function(t){Object.assign(this.attributes,t)},i.prototype.get=function(t){var e=this.attributes[t];return e===r.INHERIT&&(e=this.global[t]),e},i.prototype.getExplicit=function(t){if(this.attributes.hasOwnProperty(t))return this.attributes[t]},i.prototype.getList=function(){for(var e,t,r=[],n=0;n=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},s=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0}),e.V=1,e.H=2,e.NOSTRETCH={dir:0};var i=(o.charOptions=function(t,e){var r=t[e];return 3===r.length&&(r[3]={}),r[3]},o.prototype.createVariant=function(t,e,r){void 0===e&&(e=null),void 0===r&&(r=null);var n={linked:[],chars:e?Object.create(this.variant[e].chars):{}};r&&this.variant[r]&&(Object.assign(n.chars,this.variant[r].chars),this.variant[r].linked.push(n.chars),n.chars=Object.create(n.chars)),this.variant[t]=n},o.prototype.createVariants=function(t){var e,r;try{for(var n=c(t),i=n.next();!i.done;i=n.next()){var o=i.value;this.createVariant(o[0],o[1],o[2])}}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}},o.prototype.defineChars=function(t,e){var r,n,i=this.variant[t];Object.assign(i.chars,e);try{for(var o=c(i.linked),a=o.next();!a.done;a=o.next()){var s=a.value;Object.assign(s,e)}}catch(t){r={error:t}}finally{try{a&&!a.done&&(n=o.return)&&n.call(o)}finally{if(r)throw r.error}}},o.prototype.defineDelimiters=function(t){Object.assign(this.delimiters,t)},o.prototype.defineRemap=function(t,e){this.remapChars.hasOwnProperty(t)||(this.remapChars[t]={}),Object.assign(this.remapChars[t],e)},o.prototype.getDelimiter=function(t){return this.delimiters[t]},o.prototype.getSizeVariant=function(t,e){return this.delimiters[t].variants&&(e=this.delimiters[t].variants[e]),this.sizeVariants[e]},o.prototype.getChar=function(t,e){return this.variant[t].chars[e]},o.prototype.getVariant=function(t){return this.variant[t]},o.prototype.getCssFont=function(t){return this.cssFontMap[t]||["serif",!1,!1]},o.prototype.getRemappedChar=function(t,e){return(this.remapChars[t]||{})[e]},o.OPTIONS={},o.defaultVariants=[["normal"],["bold","normal"],["italic","normal"],["bold-italic","italic","bold"],["double-struck","bold"],["fraktur","normal"],["bold-fraktur","bold","fraktur"],["script","normal"],["bold-script","bold","script"],["sans-serif","normal"],["bold-sans-serif","bold","sans-serif"],["sans-serif-italic","italic","sans-serif"],["bold-sans-serif-italic","bold-italic","sans-serif"],["monospace","normal"]],o.defaultCssFonts={normal:["serif",!1,!1],bold:["serif",!1,!0],italic:["serif",!0,!1],"bold-italic":["serif",!0,!0],"double-struck":["serif",!1,!0],fraktur:["serif",!1,!1],"bold-fraktur":["serif",!1,!0],script:["cursive",!1,!1],"bold-script":["cursive",!1,!0],"sans-serif":["sans-serif",!1,!1],"bold-sans-serif":["sans-serif",!1,!0],"sans-serif-italic":["sans-serif",!0,!1],"bold-sans-serif-italic":["sans-serif",!0,!0],monospace:["monospace",!1,!1]},o.defaultAccentMap={768:"\u02cb",769:"\u02ca",770:"\u02c6",771:"\u02dc",772:"\u02c9",774:"\u02d8",775:"\u02d9",776:"\xa8",778:"\u02da",780:"\u02c7",8594:"\u20d7",8242:"'",8243:"''",8244:"'''",8245:"`",8246:"``",8247:"```",8279:"''''",8400:"\u21bc",8401:"\u21c0",8406:"\u2190",8417:"\u2194",8432:"*",8411:"...",8412:"....",8428:"\u21c1",8429:"\u21bd",8430:"\u2190",8431:"\u2192"},o.defaultMoMap={45:"\u2212"},o.defaultMnMap={45:"\u2212"},o.defaultParams={x_height:.442,quad:1,num1:.676,num2:.394,num3:.444,denom1:.686,denom2:.345,sup1:.413,sup2:.363,sup3:.289,sub1:.15,sub2:.247,sup_drop:.386,sub_drop:.05,delim1:2.39,delim2:1,axis_height:.25,rule_thickness:.06,big_op_spacing1:.111,big_op_spacing2:.167,big_op_spacing3:.2,big_op_spacing4:.6,big_op_spacing5:.1,surd_height:.075,scriptspace:.05,nulldelimiterspace:.12,delimiterfactor:901,delimitershortfall:.3,min_rule_thickness:1.25},o.defaultDelimiters={},o.defaultChars={},o.defaultSizeVariants=[],o);function o(){var e,t;this.variant={},this.delimiters={},this.cssFontMap={},this.remapChars={};var r=this.constructor;this.params=a({},r.defaultParams),this.sizeVariants=s(r.defaultSizeVariants),this.cssFontMap=a({},r.defaultCssFonts),this.createVariants(r.defaultVariants),this.defineDelimiters(r.defaultDelimiters);try{for(var n=c(Object.keys(r.defaultChars)),i=n.next();!i.done;i=n.next()){var o=i.value;this.defineChars(o,r.defaultChars[o])}}catch(t){e={error:t}}finally{try{i&&!i.done&&(t=n.return)&&t.call(n)}finally{if(e)throw e.error}}this.defineRemap("accent",r.defaultAccentMap),this.defineRemap("mo",r.defaultMoMap),this.defineRemap("mn",r.defaultMnMap)}e.FontData=i},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=(i.prototype[Symbol.iterator]=function(){var t=0,e=this.items;return{next:function(){return{value:e[t++],done:t>e.length}}}},i.prototype.add=function(t,e){void 0===e&&(e=i.DEFAULTPRIORITY);for(var r=this.items.length;0<=--r&&e=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},o=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var M,a=r(0),s=r(86),c=(M=a.AbstractMmlTokenNode,i(l,M),Object.defineProperty(l.prototype,"kind",{get:function(){return"mo"},enumerable:!0,configurable:!0}),Object.defineProperty(l.prototype,"isEmbellished",{get:function(){return!0},enumerable:!0,configurable:!0}),Object.defineProperty(l.prototype,"hasNewLine",{get:function(){return"newline"===this.attributes.get("linebreak")},enumerable:!0,configurable:!0}),l.prototype.coreParent=function(){for(var t=this,e=this.factory.getNodeClass("math");t&&t.isEmbellished&&t.coreMO()===this&&!(t instanceof e);)t=t.Parent;return t},l.prototype.coreText=function(t){if(!t)return"";if(t.isEmbellished)return t.coreMO().getText();for(;((t.isKind("mrow")||t.isKind("TeXAtom")||t.isKind("mstyle")||t.isKind("mphantom"))&&1===t.childNodes.length||t.isKind("munderover"))&&t.childNodes[0];)t=t.childNodes[0];return t.isToken?t.getText():""},l.prototype.hasSpacingAttributes=function(){return this.attributes.isSet("lspace")||this.attributes.isSet("rspace")},Object.defineProperty(l.prototype,"isAccent",{get:function(){var t=!1,e=this.coreParent();if(e){var r=e.isKind("mover")?e.childNodes[e.over].coreMO()?"accent":"":e.isKind("munder")?e.childNodes[e.under].coreMO()?"accentunder":"":e.isKind("munderover")?this===e.childNodes[e.over].coreMO()?"accent":this===e.childNodes[e.under].coreMO()?"accentunder":"":"";r&&(t=void 0!==e.attributes.getExplicit(r)?t:this.attributes.get("accent"))}return t},enumerable:!0,configurable:!0}),l.prototype.setTeXclass=function(t){var e=this.attributes.getList("form","fence"),r=e.form,n=e.fence;return this.attributes.isSet("lspace")||this.attributes.isSet("rspace")?(this.texClass=a.TEXCLASS.NONE,null):(n&&this.texClass===a.TEXCLASS.REL&&("prefix"===r&&(this.texClass=a.TEXCLASS.OPEN),"postfix"===r&&(this.texClass=a.TEXCLASS.CLOSE)),"\u2061"===this.getText()?(t&&(t.texClass=a.TEXCLASS.OP,t.setProperty("fnOP",!0)),this.texClass=this.prevClass=a.TEXCLASS.NONE,t):this.adjustTeXclass(t))},l.prototype.adjustTeXclass=function(t){var e=this.texClass,r=this.prevClass;if(e===a.TEXCLASS.NONE)return t;if(t?(!t.getProperty("autoOp")||e!==a.TEXCLASS.BIN&&e!==a.TEXCLASS.REL||(e=this.texClass=a.TEXCLASS.ORD),r=this.prevClass=t.texClass||a.TEXCLASS.ORD,this.prevLevel=this.attributes.getInherited("scriptlevel")):r=this.prevClass=a.TEXCLASS.NONE,e!==a.TEXCLASS.BIN||r!==a.TEXCLASS.NONE&&r!==a.TEXCLASS.BIN&&r!==a.TEXCLASS.OP&&r!==a.TEXCLASS.REL&&r!==a.TEXCLASS.OPEN&&r!==a.TEXCLASS.PUNCT)if(r!==a.TEXCLASS.BIN||e!==a.TEXCLASS.REL&&e!==a.TEXCLASS.CLOSE&&e!==a.TEXCLASS.PUNCT){if(e===a.TEXCLASS.BIN){for(var n=this,i=this.parent;i&&i.parent&&i.isEmbellished&&(1===i.childNodes.length||!i.isKind("mrow")&&i.core()===n);)i=(n=i).parent;i.childNodes[i.childNodes.length-1]===n&&(this.texClass=a.TEXCLASS.ORD)}}else t.texClass=this.prevClass=a.TEXCLASS.ORD;else this.texClass=a.TEXCLASS.ORD;return this},l.prototype.setInheritedAttributes=function(t,e,r,n){var i,o;void 0===t&&(t={}),void 0===e&&(e=!1),void 0===r&&(r=0),void 0===n&&(n=!1),M.prototype.setInheritedAttributes.call(this,t,e,r,n);var a=this.getText(),s=b(this.handleExplicitForm(this.getForms()),3),c=s[0],l=s[1],u=s[2];this.attributes.setInherited("form",c);var h=this.constructor.OPTABLE,f=h[c][a]||h[l][a]||h[u][a];if(f){void 0===this.getProperty("texClass")&&(this.texClass=f[2]);try{for(var p=g(Object.keys(f[3]||{})),d=p.next();!d.done;d=p.next()){var m=d.value;this.attributes.setInherited(m,f[3][m])}}catch(t){i={error:t}}finally{try{d&&!d.done&&(o=p.return)&&o.call(p)}finally{if(i)throw i.error}}this.lspace=(f[0]+1)/18,this.rspace=(f[1]+1)/18}else{var y=this.getRange(a);if(y){void 0===this.getProperty("texClass")&&(this.texClass=y[2]);var v=this.constructor.MMLSPACING[y[2]];this.lspace=(v[0]+1)/18,this.rspace=(v[1]+1)/18}}},l.prototype.getForms=function(){for(var t=this,e=this.parent,r=this.Parent;r&&r.isEmbellished;)t=e,e=r.parent,r=r.Parent;if(e&&e.isKind("mrow")&&1!==e.nonSpaceLength()){if(e.firstNonSpace()===t)return["prefix","infix","postfix"];if(e.lastNonSpace()===t)return["postfix","infix","prefix"]}return["infix","prefix","postfix"]},l.prototype.handleExplicitForm=function(t){if(this.attributes.isSet("form")){var e=this.attributes.get("form");t=[e].concat(t.filter(function(t){return t!==e}))}return t},l.prototype.getRange=function(t){var e,r;if(!t.match(/^[\uD800-\uDBFF]?.$/))return null;var n=t.charCodeAt(0);2===t.length&&(n=1024*(n-55296)+t.charCodeAt(1)-56320+65536);var i=this.constructor.RANGES;try{for(var o=g(i),a=o.next();!a.done;a=o.next()){var s=a.value;if(s[0]<=n&&n<=s[1])return s;if(n=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o=r(21),s=function(t,e){void 0===t&&(t="???"),void 0===e&&(e=""),this.tag=t,this.id=e};e.Label=s;var c=function(t,e,r,n,i,o,a,s){void 0===t&&(t=""),void 0===e&&(e=!1),void 0===r&&(r=!1),void 0===n&&(n=null),void 0===i&&(i=""),void 0===o&&(o=""),void 0===a&&(a=!1),void 0===s&&(s=""),this.env=t,this.taggable=e,this.defaultTags=r,this.tag=n,this.tagId=i,this.tagFormat=o,this.noTag=a,this.labelId=s};e.TagInfo=c;var l=(u.prototype.start=function(t,e,r){this.currentTag&&this.stack.push(this.currentTag),this.currentTag=new c(t,e,r)},Object.defineProperty(u.prototype,"env",{get:function(){return this.currentTag.env},enumerable:!0,configurable:!0}),u.prototype.end=function(){this.history.push(this.currentTag),this.currentTag=this.stack.pop()},u.prototype.tag=function(t,e){this.currentTag.tag=t,this.currentTag.tagFormat=e?t:this.formatTag(t),this.currentTag.noTag=!1},u.prototype.notag=function(){this.tag("",!0),this.currentTag.noTag=!0},Object.defineProperty(u.prototype,"noTag",{get:function(){return this.currentTag.noTag},enumerable:!0,configurable:!0}),Object.defineProperty(u.prototype,"label",{get:function(){return this.currentTag.labelId},set:function(t){this.currentTag.labelId=t},enumerable:!0,configurable:!0}),u.prototype.formatUrl=function(t,e){return e+"#"+encodeURIComponent(t)},u.prototype.formatTag=function(t){return"("+t+")"},u.prototype.formatId=function(t){return"mjx-eqn-"+t.replace(/\s/g,"_")},u.prototype.formatNumber=function(t){return t.toString()},u.prototype.autoTag=function(){null==this.currentTag.tag&&(this.counter++,this.tag(this.formatNumber(this.counter),!1))},u.prototype.clearTag=function(){this.label="",this.tag(null,!0),this.currentTag.tagId=""},u.prototype.getTag=function(t){if(void 0===t&&(t=!1),t)return this.autoTag(),this.makeTag();var e=this.currentTag;return e.taggable&&!e.noTag&&(e.defaultTags&&this.autoTag(),e.tag)?this.makeTag():null},u.prototype.resetTag=function(){this.history=[],this.redo=!1,this.refUpdate=!1,this.clearTag()},u.prototype.reset=function(t){void 0===t&&(t=0),this.resetTag(),this.counter=this.allCounter=t,this.allLabels={},this.allIds={}},u.prototype.startEquation=function(t){this.labels={},this.ids={},this.counter=this.allCounter,this.redo=!1;var e=t.inputData.recompile;e&&(this.refUpdate=!0,this.counter=e.counter)},u.prototype.finishEquation=function(t){this.redo&&(t.inputData.recompile={state:t.state(),counter:this.allCounter}),this.refUpdate||(this.allCounter=this.counter),Object.assign(this.allIds,this.ids),Object.assign(this.allLabels,this.labels)},u.prototype.finalize=function(t,e){if(!e.display||this.currentTag.env||null==this.currentTag.tag)return t;var r=this.makeTag();return this.enTag(t,r)},u.prototype.makeId=function(){this.currentTag.tagId=this.formatId(this.configuration.options.useLabelIds&&this.label||this.currentTag.tag)},u.prototype.makeTag=function(){this.makeId(),this.label&&(this.labels[this.label]=new s(this.currentTag.tag,this.currentTag.tagId));var t=new o.default("\\text{"+this.currentTag.tagFormat+"}",{},this.configuration).mml();return this.configuration.nodeFactory.create("node","mtd",[t],{id:this.currentTag.tagId})},u);function u(){this.counter=0,this.allCounter=0,this.configuration=null,this.ids={},this.allIds={},this.labels={},this.allLabels={},this.redo=!1,this.refUpdate=!1,this.currentTag=new c,this.history=[],this.stack=[],this.enTag=function(t,e){var r=this.configuration.nodeFactory,n=r.create("node","mtd",[t]),i=r.create("node","mlabeledtr",[e,n]);return r.create("node","mtable",[i],{side:this.configuration.options.tagSide,minlabelspacing:this.configuration.options.tagIndent,displaystyle:!0})}}e.AbstractTags=l;var h,f=(i(p,h=l),p.prototype.autoTag=function(){},p.prototype.getTag=function(){return this.currentTag.tag?h.prototype.getTag.call(this):null},p);function p(){return null!==h&&h.apply(this,arguments)||this}e.NoTags=f;var d,m,y,v,b=(i(g,d=l),g.prototype.finalize=function(t,e){if(!e.display||this.history.find(function(t){return t.taggable}))return t;var r=this.getTag(!0);return this.enTag(t,r)},g);function g(){return null!==d&&d.apply(this,arguments)||this}e.AllTags=b,m=e.TagsFactory||(e.TagsFactory={}),y=new Map([["none",f],["all",b]]),v="none",m.OPTIONS={tags:v,tagSide:"right",tagIndent:"0.8em",multlineWidth:"85%",useLabelIds:!0,ignoreDuplicateLabels:!1},m.add=function(t,e){y.set(t,e)},m.addTags=function(t){var e,r;try{for(var n=a(Object.keys(t)),i=n.next();!i.done;i=n.next()){var o=i.value;m.add(o,t[o])}}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}},m.create=function(t){return new(y.get(t)||this.defaultTags)},m.setDefault=function(t){v=t},m.getDefault=function(){return m.create(v)}},function($K,_K){var aL;aL=function(){return this}();try{aL=aL||Function("return this")()||eval("this")}catch(t){"object"==typeof window&&(aL=window)}$K.exports=aL},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(3),i=r(25),o=(Object.defineProperty(a.prototype,"name",{get:function(){return this.constructor.NAME},enumerable:!0,configurable:!0}),a.prototype.setAdaptor=function(t){this.adaptor=t},a.prototype.setMmlFactory=function(t){this.mmlFactory=t},a.prototype.initialize=function(){},Object.defineProperty(a.prototype,"processStrings",{get:function(){return!0},enumerable:!0,configurable:!0}),a.prototype.findMath=function(t,e){return[]},a.prototype.executeFilters=function(t,e,r,n){var i={math:e,document:r,data:n};return t.execute(i),i.data},a.NAME="generic",a.OPTIONS={},a);function a(t){void 0===t&&(t={}),this.adaptor=null,this.mmlFactory=null;var e=this.constructor;this.options=n.userOptions(n.defaultOptions({},e.OPTIONS),t),this.preFilters=new i.FunctionList,this.postFilters=new i.FunctionList}e.AbstractInputJax=o},function(t,e,r){"use strict";var a=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},n=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var c=r(4),l=(Object.defineProperty(u.prototype,"nodes",{get:function(){return this._nodes},enumerable:!0,configurable:!0}),u.prototype.Push=function(){for(var t,e=[],r=0;rt.configuration.options.maxMacros)throw new d.default("MaxMacroSub2","MathJax maximum substitution count exceeded; is there a recursive latex environment?");t.parse("environment",[t,r])},i.Array=function(t,e,r,n,i,o,a,s,c){var l=("c"+(i=i||t.GetArgument("\\begin{"+e.getName()+"}"))).replace(/[^clr|:]/g,"").replace(/[^|:]([|:])+/g,"$1");i=(i=i.replace(/[^clr]/g,"").split("").join(" ")).replace(/l/g,"left").replace(/r/g,"right").replace(/c/g,"center");var u=t.itemFactory.create("array");return u.arraydef={columnalign:i,columnspacing:o||"1em",rowspacing:a||"4pt"},l.match(/[|:]/)&&(l.charAt(0).match(/[|:]/)&&(u.frame.push("left"),u.dashed=":"===l.charAt(0)),l.charAt(l.length-1).match(/[|:]/)&&u.frame.push("right"),l=l.substr(1,l.length-2),u.arraydef.columnlines=l.split("").join(" ").replace(/[^|: ]/g,"none").replace(/\|/g,"solid").replace(/:/g,"dashed")),r&&u.setProperty("open",t.convertDelimiter(r)),n&&u.setProperty("close",t.convertDelimiter(n)),"D"===s?u.arraydef.displaystyle=!0:s&&(u.arraydef.displaystyle=!1),"S"===s&&(u.arraydef.scriptlevel=1),c&&(u.arraydef.useHeight=!1),t.Push(e),u},i.AlignedArray=function(t,e){var r=t.GetBrackets("\\begin{"+e.getName()+"}"),n=i.Array(t,e);return y.default.setArrayAlign(n,r)},i.Equation=function(t,e,r){return t.Push(e),y.default.checkEqnEnv(t),t.itemFactory.create("equation",r).setProperty("name",e.getName())},i.EqnArray=function(t,e,r,n,i,o){t.Push(e),n&&y.default.checkEqnEnv(t),i=(i=i.replace(/[^clr]/g,"").split("").join(" ")).replace(/l/g,"left").replace(/r/g,"right").replace(/c/g,"center");var a=t.itemFactory.create("eqnarray",e.getName(),r,n,t.stack.global);return a.arraydef={displaystyle:!0,columnalign:i,columnspacing:o||"1em",rowspacing:"3pt",side:t.options.tagSide,minlabelspacing:t.options.tagIndent},a},i.HandleNoTag=function(t,e){t.tags.notag()},i.HandleLabel=function(t,e){t.stack.global;var r=t.GetArgument(e);if(""!==r&&!t.tags.refUpdate){if(t.tags.label)throw new d.default("MultipleCommand","Multiple %1",t.currentCS);if(t.tags.label=r,(t.tags.allLabels[r]||t.tags.labels[r])&&!t.options.ignoreDuplicateLabels)throw new d.default("MultipleLabel","Label '%1' multiply defined",r);t.tags.labels[r]=new s.Label}},i.HandleRef=function(t,e,r){var n=t.GetArgument(e),i=t.tags.allLabels[n]||t.tags.labels[n];i||(t.tags.refUpdate||(t.tags.redo=!0),i=new s.Label);var o=i.tag;r&&(o=t.tags.formatTag(o));var a=t.create("node","mrow",y.default.internalMath(t,o),{href:t.tags.formatUrl(i.id,t.options.baseURL),class:"MathJax_ref"});t.Push(a)},i.Macro=function(t,e,r,n,i){if(n){var o=[];if(null!=i){var a=t.GetBrackets(e);o.push(null==a?i:a)}for(var s=o.length;st.configuration.options.maxMacros)throw new d.default("MaxMacroSub1","MathJax maximum macro substitution count exceeded; is there a recursive macro call?")},i.MathChoice=function(t,e){var r=t.ParseArg(e),n=t.ParseArg(e),i=t.ParseArg(e),o=t.ParseArg(e);t.Push(t.create("node","mathchoice",[r,n,i,o]))},e.default=i},function(t,p,e){"use strict";var d=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0Math.PI/2-r?t.thickness*h*Math.sin(u+r-Math.PI/2):0);return[f,p,f,p]},remove:e[3]}]}},p.CommonArrow=function(f){return function(t){var e=d(p.arrowDef[t],4),l=e[0],u=e[1],h=e[2],r=e[3];return[t+"arrow",{renderer:function(t,e){var r=t.getBBox(),n=r.w,i=r.h,o=r.d,a=d(h?[i+o,n]:[n,i+o],2),s=a[0],c=(a[1],t.arrow(s,l,u));f(t,c)},bbox:p.arrowBBox[t],remove:r}]}}},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),h=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0 *":{display:"block"}},g.useIC=!1,g);function g(){return null!==v&&v.apply(this,arguments)||this}e.CHTMLmsubsup=b},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),f=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},p=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=e&&a.item.renderDoc(t))return}}catch(t){r={error:t}}finally{try{o&&!o.done&&(n=i.return)&&n.call(i)}finally{if(r)throw r.error}}},v.prototype.renderMath=function(t,e,r){var n,i;void 0===r&&(r=m.STATE.UNPROCESSED);try{for(var o=h(this.items),a=o.next();!a.done;a=o.next()){var s=a.value;if(s.priority>=r&&s.item.renderMath(t,e))return}}catch(t){n={error:t}}finally{try{a&&!a.done&&(i=o.return)&&i.call(o)}finally{if(n)throw n.error}}},v.prototype.renderConvert=function(t,e,r){var n,i;void 0===r&&(r=m.STATE.LAST);try{for(var o=h(this.items),a=o.next();!a.done;a=o.next()){var s=a.value;if(s.priority>=r)return;if(s.item.convert&&s.item.renderMath(t,e))return}}catch(t){n={error:t}}finally{try{a&&!a.done&&(i=o.return)&&i.call(o)}finally{if(n)throw n.error}}},v.prototype.findID=function(t){var e,r;try{for(var n=h(this.items),i=n.next();!i.done;i=n.next()){var o=i.value;if(o.item.id===t)return o.item}}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}return null},v);function v(){return null!==o&&o.apply(this,arguments)||this}e.RenderList=y;var b,g=(b=a.AbstractInputJax,i(M,b),M.prototype.compile=function(t){return null},M);function M(){return null!==b&&b.apply(this,arguments)||this}var O,x=(O=s.AbstractOutputJax,i(S,O),S.prototype.typeset=function(t,e){return void 0===e&&(e=null),null},S.prototype.escaped=function(t,e){return null},S);function S(){return null!==O&&O.apply(this,arguments)||this}var E,C=(E=c.AbstractMathList,i(_,E),_);function _(){return null!==E&&E.apply(this,arguments)||this}var T,w=(T=m.AbstractMathItem,i(A,T),A);function A(){return null!==T&&T.apply(this,arguments)||this}var k=(Object.defineProperty(I.prototype,"kind",{get:function(){return this.constructor.KIND},enumerable:!0,configurable:!0}),I.prototype.addRenderAction=function(t){for(var e=[],r=1;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o=(Object.defineProperty(a.prototype,"factory",{get:function(){return this._factory},enumerable:!0,configurable:!0}),Object.defineProperty(a.prototype,"kind",{get:function(){return"unknown"},enumerable:!0,configurable:!0}),a.prototype.setProperty=function(t,e){this.properties[t]=e},a.prototype.getProperty=function(t){return this.properties[t]},a.prototype.getPropertyNames=function(){return Object.keys(this.properties)},a.prototype.getAllProperties=function(){return this.properties},a.prototype.removeProperty=function(){for(var e,t,r=[],n=0;n=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var a,u=r(0),s=(a=u.AbstractMmlNode,i(c,a),Object.defineProperty(c.prototype,"kind",{get:function(){return"mrow"},enumerable:!0,configurable:!0}),Object.defineProperty(c.prototype,"isSpacelike",{get:function(){var e,t;try{for(var r=l(this.childNodes),n=r.next();!n.done;n=r.next())if(!n.value.isSpacelike)return!1}catch(t){e={error:t}}finally{try{n&&!n.done&&(t=r.return)&&t.call(r)}finally{if(e)throw e.error}}return!0},enumerable:!0,configurable:!0}),Object.defineProperty(c.prototype,"isEmbellished",{get:function(){var e,t,r=!1,n=0;try{for(var i=l(this.childNodes),o=i.next();!o.done;o=i.next()){var a=o.value;if(a)if(a.isEmbellished){if(r)return!1;r=!0,this._core=n}else if(!a.isSpacelike)return!1;n++}}catch(t){e={error:t}}finally{try{o&&!o.done&&(t=i.return)&&t.call(i)}finally{if(e)throw e.error}}return r},enumerable:!0,configurable:!0}),c.prototype.core=function(){return this.isEmbellished&&null!=this._core?this.childNodes[this._core]:this},c.prototype.coreMO=function(){return this.isEmbellished&&null!=this._core?this.childNodes[this._core].coreMO():this},c.prototype.nonSpaceLength=function(){var e,t,r=0;try{for(var n=l(this.childNodes),i=n.next();!i.done;i=n.next()){var o=i.value;o&&!o.isSpacelike&&r++}}catch(t){e={error:t}}finally{try{i&&!i.done&&(t=n.return)&&t.call(n)}finally{if(e)throw e.error}}return r},c.prototype.firstNonSpace=function(){var e,t;try{for(var r=l(this.childNodes),n=r.next();!n.done;n=r.next()){var i=n.value;if(i&&!i.isSpacelike)return i}}catch(t){e={error:t}}finally{try{n&&!n.done&&(t=r.return)&&t.call(r)}finally{if(e)throw e.error}}return null},c.prototype.lastNonSpace=function(){for(var t=this.childNodes.length;0<=--t;){var e=this.childNodes[t];if(e&&!e.isSpacelike)return e}return null},c.prototype.setTeXclass=function(t){var e,r,n,i;if(null==this.getProperty("open")&&null==this.getProperty("close")||t&&null==t.getProperty("fnOp")){try{for(var o=l(this.childNodes),a=o.next();!a.done;a=o.next())t=a.value.setTeXclass(t)}catch(t){n={error:t}}finally{try{a&&!a.done&&(i=o.return)&&i.call(o)}finally{if(n)throw n.error}}this.childNodes[0]&&this.updateTeXclass(this.childNodes[0])}else{this.getPrevClass(t),t=null;try{for(var s=l(this.childNodes),c=s.next();!c.done;c=s.next())t=c.value.setTeXclass(t)}catch(t){e={error:t}}finally{try{c&&!c.done&&(r=s.return)&&r.call(s)}finally{if(e)throw e.error}}null==this.texClass&&(this.texClass=u.TEXCLASS.INNER)}return t},c.defaults=o({},u.AbstractMmlNode.defaults),c);function c(){var t=null!==a&&a.apply(this,arguments)||this;return t._core=null,t}e.MmlMrow=s;var h,f=(i(p,h=s),Object.defineProperty(p.prototype,"kind",{get:function(){return"inferredMrow"},enumerable:!0,configurable:!0}),Object.defineProperty(p.prototype,"isInferred",{get:function(){return!0},enumerable:!0,configurable:!0}),Object.defineProperty(p.prototype,"notParent",{get:function(){return!0},enumerable:!0,configurable:!0}),p.prototype.toString=function(){return"["+this.childNodes.join(",")+"]"},p.defaults=s.defaults,p);function p(){return null!==h&&h.apply(this,arguments)||this}e.MmlInferredMrow=f},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__assign||function(){return(o=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var s,c=r(0),l=(s=c.AbstractMmlBaseNode,i(u,s),Object.defineProperty(u.prototype,"kind",{get:function(){return"mfrac"},enumerable:!0,configurable:!0}),Object.defineProperty(u.prototype,"arity",{get:function(){return 2},enumerable:!0,configurable:!0}),Object.defineProperty(u.prototype,"linebreakContainer",{get:function(){return!0},enumerable:!0,configurable:!0}),u.prototype.setTeXclass=function(t){var e,r;this.getPrevClass(t);try{for(var n=a(this.childNodes),i=n.next();!i.done;i=n.next())i.value.setTeXclass(null)}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}return this.isEmbellished&&this.updateTeXclass(this.core()),this},u.prototype.setChildInheritedAttributes=function(t,e,r,n){(!e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var u,h=r(0),a=(u=h.AbstractMmlNode,i(s,u),Object.defineProperty(s.prototype,"kind",{get:function(){return"mfenced"},enumerable:!0,configurable:!0}),s.prototype.setTeXclass=function(t){this.getPrevClass(t),this.open&&(t=this.open.setTeXclass(t)),this.childNodes[0]&&(t=this.childNodes[0].setTeXclass(t));for(var e=1,r=this.childNodes.length;ethis.childNodes.length&&(t=1),this.attributes.set("selection",t)},l.defaults=o(o({},s.AbstractMmlNode.defaults),{actiontype:"toggle",selection:1}),l);function l(){return null!==a&&a.apply(this,arguments)||this}e.MmlMaction=c},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__assign||function(){return(o=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var l,u=r(0),m=r(10),a=(l=u.AbstractMmlNode,i(s,l),Object.defineProperty(s.prototype,"kind",{get:function(){return"mtable"},enumerable:!0,configurable:!0}),Object.defineProperty(s.prototype,"linebreakContainer",{get:function(){return!0},enumerable:!0,configurable:!0}),s.prototype.setInheritedAttributes=function(t,e,r,n){var i,o;try{for(var a=d(u.indentAttributes),s=a.next();!s.done;s=a.next()){var c=s.value;t[c]&&this.attributes.setInherited(c,t[c][1]),void 0!==this.attributes.getExplicit(c)&&delete this.attributes.getAllAttributes()[c]}}catch(t){i={error:t}}finally{try{s&&!s.done&&(o=a.return)&&o.call(a)}finally{if(i)throw i.error}}l.prototype.setInheritedAttributes.call(this,t,e,r,n)},s.prototype.setChildInheritedAttributes=function(t,e,r,n){var i,o,a,s;try{for(var c=d(this.childNodes),l=c.next();!l.done;l=c.next())(p=l.value).isKind("mtr")||this.replaceChild(this.factory.create("mtr"),p).appendChild(p)}catch(t){i={error:t}}finally{try{l&&!l.done&&(o=c.return)&&o.call(c)}finally{if(i)throw i.error}}e=!(!this.attributes.getExplicit("displaystyle")&&!this.attributes.getDefault("displaystyle")),t=this.addInheritedAttributes(t,{columnalign:this.attributes.get("columnalign"),rowalign:"center"});var u=m.split(this.attributes.get("rowalign"));try{for(var h=d(this.childNodes),f=h.next();!f.done;f=h.next()){var p=f.value;t.rowalign[1]=u.shift()||t.rowalign[1],p.setInheritedAttributes(t,e,r,n)}}catch(t){a={error:t}}finally{try{f&&!f.done&&(s=h.return)&&s.call(h)}finally{if(a)throw a.error}}},s.prototype.verifyChildren=function(t){var e,r;if(!t.fixMtables)try{for(var n=d(this.childNodes),i=n.next();!i.done;i=n.next())i.value.isKind("mtr")||this.mError("Children of "+this.kind+" must be mtr or mlabeledtr",t)}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}l.prototype.verifyChildren.call(this,t)},s.prototype.setTeXclass=function(t){var e,r;this.getPrevClass(t);try{for(var n=d(this.childNodes),i=n.next();!i.done;i=n.next())i.value.setTeXclass(null)}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}return this},s.defaults=o(o({},u.AbstractMmlNode.defaults),{align:"axis",rowalign:"baseline",columnalign:"center",groupalign:"{left}",alignmentscope:!0,columnwidth:"auto",width:"auto",rowspacing:"1ex",columnspacing:".8em",rowlines:"none",columnlines:"none",frame:"none",framespacing:"0.4em 0.5ex",equalrows:!1,equalcolumns:!1,displaystyle:!1,side:"right",minlabelspacing:"0.8em"}),s);function s(){var t=null!==l&&l.apply(this,arguments)||this;return t.properties={useHeight:1},t.texClass=u.TEXCLASS.ORD,t}e.MmlMtable=a},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__assign||function(){return(o=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var a,s=r(0),c=r(19),m=r(10),l=(a=s.AbstractMmlNode,i(u,a),Object.defineProperty(u.prototype,"kind",{get:function(){return"mtr"},enumerable:!0,configurable:!0}),Object.defineProperty(u.prototype,"linebreakContainer",{get:function(){return!0},enumerable:!0,configurable:!0}),u.prototype.setChildInheritedAttributes=function(t,e,r,n){var i,o,a,s;try{for(var c=d(this.childNodes),l=c.next();!l.done;l=c.next())(p=l.value).isKind("mtd")||this.replaceChild(this.factory.create("mtd"),p).appendChild(p)}catch(t){i={error:t}}finally{try{l&&!l.done&&(o=c.return)&&o.call(c)}finally{if(i)throw i.error}}var u=m.split(this.attributes.get("columnalign"));1===this.arity&&u.unshift(this.parent.attributes.get("side")),t=this.addInheritedAttributes(t,{rowalign:this.attributes.get("rowalign"),columnalign:"center"});try{for(var h=d(this.childNodes),f=h.next();!f.done;f=h.next()){var p=f.value;t.columnalign[1]=u.shift()||t.columnalign[1],p.setInheritedAttributes(t,e,r,n)}}catch(t){a={error:t}}finally{try{f&&!f.done&&(s=h.return)&&s.call(h)}finally{if(a)throw a.error}}},u.prototype.verifyChildren=function(t){var e,r;if(!this.parent||this.parent.isKind("mtable")){if(!t.fixMtables)try{for(var n=d(this.childNodes),i=n.next();!i.done;i=n.next()){var o=i.value;o.isKind("mtd")||this.replaceChild(this.factory.create("mtr"),o).mError("Children of "+this.kind+" must be mtd",t,!0)}}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}a.prototype.verifyChildren.call(this,t)}else this.mError(this.kind+" can only be a child of an mtable",t,!0)},u.prototype.setTeXclass=function(t){var e,r;this.getPrevClass(t);try{for(var n=d(this.childNodes),i=n.next();!i.done;i=n.next())i.value.setTeXclass(null)}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}return this},u.defaults=o(o({},s.AbstractMmlNode.defaults),{rowalign:c.INHERIT,columnalign:c.INHERIT,groupalign:c.INHERIT}),u);function u(){return null!==a&&a.apply(this,arguments)||this}e.MmlMtr=l;var h,f=(i(p,h=l),Object.defineProperty(p.prototype,"kind",{get:function(){return"mlabeledtr"},enumerable:!0,configurable:!0}),Object.defineProperty(p.prototype,"arity",{get:function(){return 1},enumerable:!0,configurable:!0}),p);function p(){return null!==h&&h.apply(this,arguments)||this}e.MmlMlabeledtr=f},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__assign||function(){return(o=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},v=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0t.configuration.options.maxMacros)throw new l.default("MaxMacroSub1","MathJax maximum macro substitution count exceeded; is here a recursive macro call?")},BeginEnv:function(t,e,r,n,i,o){if(e.getProperty("end")&&t.stack.env.closing===e.getName()){delete t.stack.env.closing;var a=t.string.slice(t.i);return t.string=n,t.i=0,t.Parse(),t.string=a,t.i=0,t.itemFactory.create("end").setProperty("name",e.getName())}if(i){var s=[];if(null!=o){var c=t.GetBrackets("\\begin{"+e.getName()+"}");s.push(null==c?o:c)}for(var l=s.length;l=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var l=r(16);e.CommonMrowMixin=function(t){return i(e,s=t),Object.defineProperty(e.prototype,"fixesPWidth",{get:function(){return!1},enumerable:!0,configurable:!0}),e.prototype.stretchChildren=function(){var e,t,r,n,i,o,a=[];try{for(var s=S(this.childNodes),c=s.next();!c.done;c=s.next())(x=c.value).canStretch(1)&&a.push(x)}catch(t){e={error:t}}finally{try{c&&!c.done&&(t=s.return)&&t.call(s)}finally{if(e)throw e.error}}var l=a.length,u=this.childNodes.length;if(l&&1 mjx-box":{"border-top":".07em solid"},"mjx-sqrt.mjx-tall > mjx-box":{"padding-left":".3em","margin-left":"-.3em"}},u);function u(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmsqrt=l},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),C=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0}),e.CommonMtrMixin=function(t){return i(e,r=t),Object.defineProperty(e.prototype,"fixesPWidth",{get:function(){return!1},enumerable:!0,configurable:!0}),Object.defineProperty(e.prototype,"numCells",{get:function(){return this.childNodes.length},enumerable:!0,configurable:!0}),Object.defineProperty(e.prototype,"labeled",{get:function(){return!1},enumerable:!0,configurable:!0}),Object.defineProperty(e.prototype,"tableCells",{get:function(){return this.childNodes},enumerable:!0,configurable:!0}),e.prototype.getChild=function(t){return this.childNodes[t]},e.prototype.getChildBBoxes=function(){return this.childNodes.map(function(t){return t.getBBox()})},e.prototype.stretchChildren=function(t){var e,r,n,i,o,a;void 0===t&&(t=null);var s=[],c=this.labeled?this.childNodes.slice(1):this.childNodes;try{for(var l=C(c),u=l.next();!u.done;u=l.next())(E=u.value.childNodes[0]).canStretch(1)&&s.push(E)}catch(t){e={error:t}}finally{try{u&&!u.done&&(r=l.return)&&r.call(l)}finally{if(e)throw e.error}}var h=s.length,f=this.childNodes.length;if(h&&1=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(80),s=(o=a.AbstractDOMAdaptor,i(l,o),l.prototype.parse=function(t,e){return void 0===e&&(e="text/html"),this.parser.parseFromString(t,e)},l.prototype.create=function(t,e){return e?this.document.createElementNS(e,t):this.document.createElement(t)},l.prototype.text=function(t){return this.document.createTextNode(t)},l.prototype.head=function(t){return t.head},l.prototype.body=function(t){return t.body},l.prototype.root=function(t){return t.documentElement},l.prototype.tags=function(t,e,r){void 0===r&&(r=null);var n=r?t.getElementsByTagNameNS(r,e):t.getElementsByTagName(e);return Array.from(n)},l.prototype.getElements=function(t,e){var r,n,i=[];try{for(var o=c(t),a=o.next();!a.done;a=o.next()){var s=a.value;"string"==typeof s?i=i.concat(Array.from(this.document.querySelectorAll(s))):Array.isArray(s)?i=i.concat(Array.from(s)):s instanceof this.window.NodeList||s instanceof this.window.HTMLCollection?i=i.concat(Array.from(s)):i.push(s)}}catch(t){r={error:t}}finally{try{a&&!a.done&&(n=o.return)&&n.call(o)}finally{if(r)throw r.error}}return i},l.prototype.parent=function(t){return t.parentNode},l.prototype.append=function(t,e){return t.appendChild(e)},l.prototype.insert=function(t,e){return this.parent(e).insertBefore(t,e)},l.prototype.remove=function(t){return this.parent(t).removeChild(t)},l.prototype.replace=function(t,e){return this.parent(e).replaceChild(t,e)},l.prototype.clone=function(t){return t.cloneNode(!0)},l.prototype.split=function(t,e){return t.splitText(e)},l.prototype.next=function(t){return t.nextSibling},l.prototype.previous=function(t){return t.previousSibling},l.prototype.firstChild=function(t){return t.firstChild},l.prototype.lastChild=function(t){return t.lastChild},l.prototype.childNodes=function(t){return Array.from(t.childNodes)},l.prototype.childNode=function(t,e){return t.childNodes[e]},l.prototype.kind=function(t){return t.nodeName.toLowerCase()},l.prototype.value=function(t){return t.nodeValue||""},l.prototype.textContent=function(t){return t.textContent},l.prototype.innerHTML=function(t){return t.innerHTML},l.prototype.outerHTML=function(t){return t.outerHTML},l.prototype.setAttribute=function(t,e,r,n){return void 0===n&&(n=null),n?t.setAttributeNS(n,e,r):t.setAttribute(e,r)},l.prototype.getAttribute=function(t,e){return t.getAttribute(e)},l.prototype.removeAttribute=function(t,e){return t.removeAttribute(e)},l.prototype.hasAttribute=function(t,e){return t.hasAttribute(e)},l.prototype.allAttributes=function(t){return Array.from(t.attributes).map(function(t){return{name:t.name,value:t.value}})},l.prototype.addClass=function(t,e){t.classList.add(e)},l.prototype.removeClass=function(t,e){return t.classList.remove(e)},l.prototype.hasClass=function(t,e){return t.classList.contains(e)},l.prototype.setStyle=function(t,e,r){t.style[e]=r},l.prototype.getStyle=function(t,e){return t.style[e]},l.prototype.allStyles=function(t){return t.style.cssText},l.prototype.fontSize=function(t){var e=this.window.getComputedStyle(t);return parseFloat(e.fontSize)},l.prototype.nodeSize=function(t,e,r){if(void 0===e&&(e=1),void 0===r&&(r=!1),r&&t.getBBox){var n=t.getBBox();return[n.width/e,n.height/e]}return[t.offsetWidth/e,t.offsetHeight/e]},l.prototype.nodeBBox=function(t){var e=t.getBoundingClientRect();return{left:e.left,right:e.right,top:e.top,bottom:e.bottom}},l);function l(t){var e=o.call(this,t.document)||this;return e.window=t,e.parser=new t.DOMParser,e}e.HTMLAdaptor=s},function(t,e,r){"use strict";var m=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var n=(i.prototype.node=function(t,e,r,n){var i,o;void 0===e&&(e={}),void 0===r&&(r=[]);var a=this.create(t,n);this.setAttributes(a,e);try{for(var s=m(r),c=s.next();!c.done;c=s.next()){var l=c.value;this.append(a,l)}}catch(t){i={error:t}}finally{try{c&&!c.done&&(o=s.return)&&o.call(s)}finally{if(i)throw i.error}}return a},i.prototype.setAttributes=function(t,e){var r,n,i,o,a,s;if(e.style&&"string"!=typeof e.style)try{for(var c=m(Object.keys(e.style)),l=c.next();!l.done;l=c.next()){var u=l.value;this.setStyle(t,u.replace(/-([a-z])/g,function(t,e){return e.toUpperCase()}),e.style[u])}}catch(t){r={error:t}}finally{try{l&&!l.done&&(n=c.return)&&n.call(c)}finally{if(r)throw r.error}}if(e.properties)try{for(var h=m(Object.keys(e.properties)),f=h.next();!f.done;f=h.next())t[u=f.value]=e.properties[u]}catch(t){i={error:t}}finally{try{f&&!f.done&&(o=h.return)&&o.call(h)}finally{if(i)throw i.error}}try{for(var p=m(Object.keys(e)),d=p.next();!d.done;d=p.next())"style"===(u=d.value)&&"string"!=typeof e.style||"properties"===u||this.setAttribute(t,u,e[u])}catch(t){a={error:t}}finally{try{d&&!d.done&&(s=p.return)&&s.call(p)}finally{if(a)throw a.error}}},i.prototype.replace=function(t,e){return this.insert(t,e),this.remove(e),e},i.prototype.childNode=function(t,e){return this.childNodes(t)[e]},i.prototype.allClasses=function(t){var e=this.getAttribute(t,"class");return e?e.replace(/ +/g," ").replace(/^ /,"").replace(/ $/,"").split(/ /):[]},i);function i(t){void 0===t&&(t=null),this.document=t}e.AbstractDOMAdaptor=n},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(79);e.browserAdaptor=function(){return new n.HTMLAdaptor(window)}},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(41),s=(o=a.AbstractMathDocument,i(c,o),c);function c(){return null!==o&&o.apply(this,arguments)||this}var l=(Object.defineProperty(u.prototype,"name",{get:function(){return this.constructor.NAME},enumerable:!0,configurable:!0}),u.prototype.handlesDocument=function(t){return!1},u.prototype.create=function(t,e){return new this.documentClass(t,this.adaptor,e)},u.NAME="generic",u);function u(t,e){void 0===e&&(e=5),this.documentClass=s,this.adaptor=t,this.priority=e}e.AbstractHandler=l},function(t,e,r){"use strict";var l=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var h=Symbol(),c=function(t){void 0===t&&(t=null),this.next=null,this.prev=null,this.data=t};e.ListItem=c;var i=(f.prototype.toArray=function(){return Array.from(this)},f.prototype.isBefore=function(t,e){return t":e.MO.BIN5,".":[0,3,i.TEXCLASS.PUNCT,{separator:!0}],"/":e.MO.ORD11,"//":n(1,1),"/=":e.MO.BIN4,":":[1,2,i.TEXCLASS.REL,null],":=":e.MO.BIN4,";":[0,3,i.TEXCLASS.PUNCT,{linebreakstyle:"after",separator:!0}],"<":e.MO.REL,"<=":e.MO.BIN5,"<>":n(1,1),"=":e.MO.REL,"==":e.MO.BIN4,">":e.MO.REL,">=":e.MO.BIN5,"?":[1,1,i.TEXCLASS.CLOSE,null],"@":e.MO.ORD11,"\\":e.MO.ORD,"^":e.MO.ORD11,_:e.MO.ORD11,"|":[2,2,i.TEXCLASS.ORD,{fence:!0,stretchy:!0,symmetric:!0}],"||":[2,2,i.TEXCLASS.BIN,{fence:!0,stretchy:!0,symmetric:!0}],"|||":[2,2,i.TEXCLASS.ORD,{fence:!0,stretchy:!0,symmetric:!0}],"\xb1":e.MO.BIN4,"\xb7":e.MO.BIN4,"\xd7":e.MO.BIN4,"\xf7":e.MO.BIN4,"\u02b9":e.MO.ORD,"\u0300":e.MO.ACCENT,"\u0301":e.MO.ACCENT,"\u0303":e.MO.WIDEACCENT,"\u0304":e.MO.ACCENT,"\u0306":e.MO.ACCENT,"\u0307":e.MO.ACCENT,"\u0308":e.MO.ACCENT,"\u030c":e.MO.ACCENT,"\u0332":e.MO.WIDEACCENT,"\u0338":e.MO.REL4,"\u2015":[0,0,i.TEXCLASS.ORD,{stretchy:!0}],"\u2017":[0,0,i.TEXCLASS.ORD,{stretchy:!0}],"\u2020":e.MO.BIN3,"\u2021":e.MO.BIN3,"\u2022":e.MO.BIN4,"\u2026":e.MO.INNER,"\u2044":e.MO.TALLBIN,"\u2061":e.MO.ORD,"\u2062":e.MO.ORD,"\u2063":[0,0,i.TEXCLASS.ORD,{linebreakstyle:"after",separator:!0}],"\u2064":e.MO.ORD,"\u20d7":e.MO.ACCENT,"\u2111":e.MO.ORD,"\u2113":e.MO.ORD,"\u2118":e.MO.ORD,"\u211c":e.MO.ORD,"\u2190":e.MO.WIDEREL,"\u2191":e.MO.RELSTRETCH,"\u2192":e.MO.WIDEREL,"\u2193":e.MO.RELSTRETCH,"\u2194":e.MO.WIDEREL,"\u2195":e.MO.RELSTRETCH,"\u2196":e.MO.RELSTRETCH,"\u2197":e.MO.RELSTRETCH,"\u2198":e.MO.RELSTRETCH,"\u2199":e.MO.RELSTRETCH,"\u219a":e.MO.RELACCENT,"\u219b":e.MO.RELACCENT,"\u219c":e.MO.WIDEREL,"\u219d":e.MO.WIDEREL,"\u219e":e.MO.WIDEREL,"\u219f":e.MO.WIDEREL,"\u21a0":e.MO.WIDEREL,"\u21a1":e.MO.RELSTRETCH,"\u21a2":e.MO.WIDEREL,"\u21a3":e.MO.WIDEREL,"\u21a4":e.MO.WIDEREL,"\u21a5":e.MO.RELSTRETCH,"\u21a6":e.MO.WIDEREL,"\u21a7":e.MO.RELSTRETCH,"\u21a8":e.MO.RELSTRETCH,"\u21a9":e.MO.WIDEREL,"\u21aa":e.MO.WIDEREL,"\u21ab":e.MO.WIDEREL,"\u21ac":e.MO.WIDEREL,"\u21ad":e.MO.WIDEREL,"\u21ae":e.MO.RELACCENT,"\u21af":e.MO.RELSTRETCH,"\u21b0":e.MO.RELSTRETCH,"\u21b1":e.MO.RELSTRETCH,"\u21b2":e.MO.RELSTRETCH,"\u21b3":e.MO.RELSTRETCH,"\u21b4":e.MO.RELSTRETCH,"\u21b5":e.MO.RELSTRETCH,"\u21b6":e.MO.RELACCENT,"\u21b7":e.MO.RELACCENT,"\u21b8":e.MO.REL,"\u21b9":e.MO.WIDEREL,"\u21ba":e.MO.REL,"\u21bb":e.MO.REL,"\u21bc":e.MO.WIDEREL,"\u21bd":e.MO.WIDEREL,"\u21be":e.MO.RELSTRETCH,"\u21bf":e.MO.RELSTRETCH,"\u21c0":e.MO.WIDEREL,"\u21c1":e.MO.WIDEREL,"\u21c2":e.MO.RELSTRETCH,"\u21c3":e.MO.RELSTRETCH,"\u21c4":e.MO.WIDEREL,"\u21c5":e.MO.RELSTRETCH,"\u21c6":e.MO.WIDEREL,"\u21c7":e.MO.WIDEREL,"\u21c8":e.MO.RELSTRETCH,"\u21c9":e.MO.WIDEREL,"\u21ca":e.MO.RELSTRETCH,"\u21cb":e.MO.WIDEREL,"\u21cc":e.MO.WIDEREL,"\u21cd":e.MO.RELACCENT,"\u21ce":e.MO.RELACCENT,"\u21cf":e.MO.RELACCENT,"\u21d0":e.MO.WIDEREL,"\u21d1":e.MO.RELSTRETCH,"\u21d2":e.MO.WIDEREL,"\u21d3":e.MO.RELSTRETCH,"\u21d4":e.MO.WIDEREL,"\u21d5":e.MO.RELSTRETCH,"\u21d6":e.MO.RELSTRETCH,"\u21d7":e.MO.RELSTRETCH,"\u21d8":e.MO.RELSTRETCH,"\u21d9":e.MO.RELSTRETCH,"\u21da":e.MO.WIDEREL,"\u21db":e.MO.WIDEREL,"\u21dc":e.MO.WIDEREL,"\u21dd":e.MO.WIDEREL,"\u21de":e.MO.REL,"\u21df":e.MO.REL,"\u21e0":e.MO.WIDEREL,"\u21e1":e.MO.RELSTRETCH,"\u21e2":e.MO.WIDEREL,"\u21e3":e.MO.RELSTRETCH,"\u21e4":e.MO.WIDEREL,"\u21e5":e.MO.WIDEREL,"\u21e6":e.MO.WIDEREL,"\u21e7":e.MO.RELSTRETCH,"\u21e8":e.MO.WIDEREL,"\u21e9":e.MO.RELSTRETCH,"\u21ea":e.MO.RELSTRETCH,"\u21eb":e.MO.RELSTRETCH,"\u21ec":e.MO.RELSTRETCH,"\u21ed":e.MO.RELSTRETCH,"\u21ee":e.MO.RELSTRETCH,"\u21ef":e.MO.RELSTRETCH,"\u21f0":e.MO.WIDEREL,"\u21f1":e.MO.REL,"\u21f2":e.MO.REL,"\u21f3":e.MO.RELSTRETCH,"\u21f4":e.MO.RELACCENT,"\u21f5":e.MO.RELSTRETCH,"\u21f6":e.MO.WIDEREL,"\u21f7":e.MO.RELACCENT,"\u21f8":e.MO.RELACCENT,"\u21f9":e.MO.RELACCENT,"\u21fa":e.MO.RELACCENT,"\u21fb":e.MO.RELACCENT,"\u21fc":e.MO.RELACCENT,"\u21fd":e.MO.WIDEREL,"\u21fe":e.MO.WIDEREL,"\u21ff":e.MO.WIDEREL,"\u2201":n(1,2,i.TEXCLASS.ORD),"\u2205":e.MO.ORD,"\u2206":e.MO.BIN3,"\u2208":e.MO.REL,"\u2209":e.MO.REL,"\u220a":e.MO.REL,"\u220b":e.MO.REL,"\u220c":e.MO.REL,"\u220d":e.MO.REL,"\u220e":e.MO.BIN3,"\u2212":e.MO.BIN4,"\u2213":e.MO.BIN4,"\u2214":e.MO.BIN4,"\u2215":e.MO.TALLBIN,"\u2216":e.MO.BIN4,"\u2217":e.MO.BIN4,"\u2218":e.MO.BIN4,"\u2219":e.MO.BIN4,"\u221d":e.MO.REL,"\u221e":e.MO.ORD,"\u221f":e.MO.REL,"\u2223":e.MO.REL,"\u2224":e.MO.REL,"\u2225":e.MO.REL,"\u2226":e.MO.REL,"\u2227":e.MO.BIN4,"\u2228":e.MO.BIN4,"\u2229":e.MO.BIN4,"\u222a":e.MO.BIN4,"\u2234":e.MO.REL,"\u2235":e.MO.REL,"\u2236":e.MO.REL,"\u2237":e.MO.REL,"\u2238":e.MO.BIN4,"\u2239":e.MO.REL,"\u223a":e.MO.BIN4,"\u223b":e.MO.REL,"\u223c":e.MO.REL,"\u223d":e.MO.REL,"\u223d\u0331":e.MO.BIN3,"\u223e":e.MO.REL,"\u223f":e.MO.BIN3,"\u2240":e.MO.BIN4,"\u2241":e.MO.REL,"\u2242":e.MO.REL,"\u2242\u0338":e.MO.REL,"\u2243":e.MO.REL,"\u2244":e.MO.REL,"\u2245":e.MO.REL,"\u2246":e.MO.REL,"\u2247":e.MO.REL,"\u2248":e.MO.REL,"\u2249":e.MO.REL,"\u224a":e.MO.REL,"\u224b":e.MO.REL,"\u224c":e.MO.REL,"\u224d":e.MO.REL,"\u224e":e.MO.REL,"\u224e\u0338":e.MO.REL,"\u224f":e.MO.REL,"\u224f\u0338":e.MO.REL,"\u2250":e.MO.REL,"\u2251":e.MO.REL,"\u2252":e.MO.REL,"\u2253":e.MO.REL,"\u2254":e.MO.REL,"\u2255":e.MO.REL,"\u2256":e.MO.REL,"\u2257":e.MO.REL,"\u2258":e.MO.REL,"\u2259":e.MO.REL,"\u225a":e.MO.REL,"\u225c":e.MO.REL,"\u225d":e.MO.REL,"\u225e":e.MO.REL,"\u225f":e.MO.REL,"\u2260":e.MO.REL,"\u2261":e.MO.REL,"\u2262":e.MO.REL,"\u2263":e.MO.REL,"\u2264":e.MO.REL,"\u2265":e.MO.REL,"\u2266":e.MO.REL,"\u2266\u0338":e.MO.REL,"\u2267":e.MO.REL,"\u2268":e.MO.REL,"\u2269":e.MO.REL,"\u226a":e.MO.REL,"\u226a\u0338":e.MO.REL,"\u226b":e.MO.REL,"\u226b\u0338":e.MO.REL,"\u226c":e.MO.REL,"\u226d":e.MO.REL,"\u226e":e.MO.REL,"\u226f":e.MO.REL,"\u2270":e.MO.REL,"\u2271":e.MO.REL,"\u2272":e.MO.REL,"\u2273":e.MO.REL,"\u2274":e.MO.REL,"\u2275":e.MO.REL,"\u2276":e.MO.REL,"\u2277":e.MO.REL,"\u2278":e.MO.REL,"\u2279":e.MO.REL,"\u227a":e.MO.REL,"\u227b":e.MO.REL,"\u227c":e.MO.REL,"\u227d":e.MO.REL,"\u227e":e.MO.REL,"\u227f":e.MO.REL,"\u227f\u0338":e.MO.REL,"\u2280":e.MO.REL,"\u2281":e.MO.REL,"\u2282":e.MO.REL,"\u2282\u20d2":e.MO.REL,"\u2283":e.MO.REL,"\u2283\u20d2":e.MO.REL,"\u2284":e.MO.REL,"\u2285":e.MO.REL,"\u2286":e.MO.REL,"\u2287":e.MO.REL,"\u2288":e.MO.REL,"\u2289":e.MO.REL,"\u228a":e.MO.REL,"\u228b":e.MO.REL,"\u228c":e.MO.BIN4,"\u228d":e.MO.BIN4,"\u228e":e.MO.BIN4,"\u228f":e.MO.REL,"\u228f\u0338":e.MO.REL,"\u2290":e.MO.REL,"\u2290\u0338":e.MO.REL,"\u2291":e.MO.REL,"\u2292":e.MO.REL,"\u2293":e.MO.BIN4,"\u2294":e.MO.BIN4,"\u2295":e.MO.BIN4,"\u2296":e.MO.BIN4,"\u2297":e.MO.BIN4,"\u2298":e.MO.BIN4,"\u2299":e.MO.BIN4,"\u229a":e.MO.BIN4,"\u229b":e.MO.BIN4,"\u229c":e.MO.BIN4,"\u229d":e.MO.BIN4,"\u229e":e.MO.BIN4,"\u229f":e.MO.BIN4,"\u22a0":e.MO.BIN4,"\u22a1":e.MO.BIN4,"\u22a2":e.MO.REL,"\u22a3":e.MO.REL,"\u22a4":e.MO.ORD55,"\u22a5":e.MO.REL,"\u22a6":e.MO.REL,"\u22a7":e.MO.REL,"\u22a8":e.MO.REL,"\u22a9":e.MO.REL,"\u22aa":e.MO.REL,"\u22ab":e.MO.REL,"\u22ac":e.MO.REL,"\u22ad":e.MO.REL,"\u22ae":e.MO.REL,"\u22af":e.MO.REL,"\u22b0":e.MO.REL,"\u22b1":e.MO.REL,"\u22b2":e.MO.REL,"\u22b3":e.MO.REL,"\u22b4":e.MO.REL,"\u22b5":e.MO.REL,"\u22b6":e.MO.REL,"\u22b7":e.MO.REL,"\u22b8":e.MO.REL,"\u22b9":e.MO.REL,"\u22ba":e.MO.BIN4,"\u22bb":e.MO.BIN4,"\u22bc":e.MO.BIN4,"\u22bd":e.MO.BIN4,"\u22be":e.MO.BIN3,"\u22bf":e.MO.BIN3,"\u22c4":e.MO.BIN4,"\u22c5":e.MO.BIN4,"\u22c6":e.MO.BIN4,"\u22c7":e.MO.BIN4,"\u22c8":e.MO.REL,"\u22c9":e.MO.BIN4,"\u22ca":e.MO.BIN4,"\u22cb":e.MO.BIN4,"\u22cc":e.MO.BIN4,"\u22cd":e.MO.REL,"\u22ce":e.MO.BIN4,"\u22cf":e.MO.BIN4,"\u22d0":e.MO.REL,"\u22d1":e.MO.REL,"\u22d2":e.MO.BIN4,"\u22d3":e.MO.BIN4,"\u22d4":e.MO.REL,"\u22d5":e.MO.REL,"\u22d6":e.MO.REL,"\u22d7":e.MO.REL,"\u22d8":e.MO.REL,"\u22d9":e.MO.REL,"\u22da":e.MO.REL,"\u22db":e.MO.REL,"\u22dc":e.MO.REL,"\u22dd":e.MO.REL,"\u22de":e.MO.REL,"\u22df":e.MO.REL,"\u22e0":e.MO.REL,"\u22e1":e.MO.REL,"\u22e2":e.MO.REL,"\u22e3":e.MO.REL,"\u22e4":e.MO.REL,"\u22e5":e.MO.REL,"\u22e6":e.MO.REL,"\u22e7":e.MO.REL,"\u22e8":e.MO.REL,"\u22e9":e.MO.REL,"\u22ea":e.MO.REL,"\u22eb":e.MO.REL,"\u22ec":e.MO.REL,"\u22ed":e.MO.REL,"\u22ee":e.MO.ORD55,"\u22ef":e.MO.INNER,"\u22f0":e.MO.REL,"\u22f1":[5,5,i.TEXCLASS.INNER,null],"\u22f2":e.MO.REL,"\u22f3":e.MO.REL,"\u22f4":e.MO.REL,"\u22f5":e.MO.REL,"\u22f6":e.MO.REL,"\u22f7":e.MO.REL,"\u22f8":e.MO.REL,"\u22f9":e.MO.REL,"\u22fa":e.MO.REL,"\u22fb":e.MO.REL,"\u22fc":e.MO.REL,"\u22fd":e.MO.REL,"\u22fe":e.MO.REL,"\u22ff":e.MO.REL,"\u2305":e.MO.BIN3,"\u2306":e.MO.BIN3,"\u2322":e.MO.REL4,"\u2323":e.MO.REL4,"\u2329":e.MO.OPEN,"\u232a":e.MO.CLOSE,"\u23aa":e.MO.ORD,"\u23af":[0,0,i.TEXCLASS.ORD,{stretchy:!0}],"\u23b0":e.MO.OPEN,"\u23b1":e.MO.CLOSE,"\u2500":e.MO.ORD,"\u25b3":e.MO.BIN4,"\u25b5":e.MO.BIN4,"\u25b9":e.MO.BIN4,"\u25bd":e.MO.BIN4,"\u25bf":e.MO.BIN4,"\u25c3":e.MO.BIN4,"\u25ef":e.MO.BIN3,"\u2660":e.MO.ORD,"\u2661":e.MO.ORD,"\u2662":e.MO.ORD,"\u2663":e.MO.ORD,"\u2758":e.MO.REL,"\u27f0":e.MO.RELSTRETCH,"\u27f1":e.MO.RELSTRETCH,"\u27f5":e.MO.WIDEREL,"\u27f6":e.MO.WIDEREL,"\u27f7":e.MO.WIDEREL,"\u27f8":e.MO.WIDEREL,"\u27f9":e.MO.WIDEREL,"\u27fa":e.MO.WIDEREL,"\u27fb":e.MO.WIDEREL,"\u27fc":e.MO.WIDEREL,"\u27fd":e.MO.WIDEREL,"\u27fe":e.MO.WIDEREL,"\u27ff":e.MO.WIDEREL,"\u2900":e.MO.RELACCENT,"\u2901":e.MO.RELACCENT,"\u2902":e.MO.RELACCENT,"\u2903":e.MO.RELACCENT,"\u2904":e.MO.RELACCENT,"\u2905":e.MO.RELACCENT,"\u2906":e.MO.RELACCENT,"\u2907":e.MO.RELACCENT,"\u2908":e.MO.REL,"\u2909":e.MO.REL,"\u290a":e.MO.RELSTRETCH,"\u290b":e.MO.RELSTRETCH,"\u290c":e.MO.WIDEREL,"\u290d":e.MO.WIDEREL,"\u290e":e.MO.WIDEREL,"\u290f":e.MO.WIDEREL,"\u2910":e.MO.WIDEREL,"\u2911":e.MO.RELACCENT,"\u2912":e.MO.RELSTRETCH,"\u2913":e.MO.RELSTRETCH,"\u2914":e.MO.RELACCENT,"\u2915":e.MO.RELACCENT,"\u2916":e.MO.RELACCENT,"\u2917":e.MO.RELACCENT,"\u2918":e.MO.RELACCENT,"\u2919":e.MO.RELACCENT,"\u291a":e.MO.RELACCENT,"\u291b":e.MO.RELACCENT,"\u291c":e.MO.RELACCENT,"\u291d":e.MO.RELACCENT,"\u291e":e.MO.RELACCENT,"\u291f":e.MO.RELACCENT,"\u2920":e.MO.RELACCENT,"\u2921":e.MO.RELSTRETCH,"\u2922":e.MO.RELSTRETCH,"\u2923":e.MO.REL,"\u2924":e.MO.REL,"\u2925":e.MO.REL,"\u2926":e.MO.REL,"\u2927":e.MO.REL,"\u2928":e.MO.REL,"\u2929":e.MO.REL,"\u292a":e.MO.REL,"\u292b":e.MO.REL,"\u292c":e.MO.REL,"\u292d":e.MO.REL,"\u292e":e.MO.REL,"\u292f":e.MO.REL,"\u2930":e.MO.REL,"\u2931":e.MO.REL,"\u2932":e.MO.REL,"\u2933":e.MO.RELACCENT,"\u2934":e.MO.REL,"\u2935":e.MO.REL,"\u2936":e.MO.REL,"\u2937":e.MO.REL,"\u2938":e.MO.REL,"\u2939":e.MO.REL,"\u293a":e.MO.RELACCENT,"\u293b":e.MO.RELACCENT,"\u293c":e.MO.RELACCENT,"\u293d":e.MO.RELACCENT,"\u293e":e.MO.REL,"\u293f":e.MO.REL,"\u2940":e.MO.REL,"\u2941":e.MO.REL,"\u2942":e.MO.RELACCENT,"\u2943":e.MO.RELACCENT,"\u2944":e.MO.RELACCENT,"\u2945":e.MO.RELACCENT,"\u2946":e.MO.RELACCENT,"\u2947":e.MO.RELACCENT,"\u2948":e.MO.RELACCENT,"\u2949":e.MO.REL,"\u294a":e.MO.RELACCENT,"\u294b":e.MO.RELACCENT,"\u294c":e.MO.REL,"\u294d":e.MO.REL,"\u294e":e.MO.WIDEREL,"\u294f":e.MO.RELSTRETCH,"\u2950":e.MO.WIDEREL,"\u2951":e.MO.RELSTRETCH,"\u2952":e.MO.WIDEREL,"\u2953":e.MO.WIDEREL,"\u2954":e.MO.RELSTRETCH,"\u2955":e.MO.RELSTRETCH,"\u2956":e.MO.RELSTRETCH,"\u2957":e.MO.RELSTRETCH,"\u2958":e.MO.RELSTRETCH,"\u2959":e.MO.RELSTRETCH,"\u295a":e.MO.WIDEREL,"\u295b":e.MO.WIDEREL,"\u295c":e.MO.RELSTRETCH,"\u295d":e.MO.RELSTRETCH,"\u295e":e.MO.WIDEREL,"\u295f":e.MO.WIDEREL,"\u2960":e.MO.RELSTRETCH,"\u2961":e.MO.RELSTRETCH,"\u2962":e.MO.RELACCENT,"\u2963":e.MO.REL,"\u2964":e.MO.RELACCENT,"\u2965":e.MO.REL,"\u2966":e.MO.RELACCENT,"\u2967":e.MO.RELACCENT,"\u2968":e.MO.RELACCENT,"\u2969":e.MO.RELACCENT,"\u296a":e.MO.RELACCENT,"\u296b":e.MO.RELACCENT,"\u296c":e.MO.RELACCENT,"\u296d":e.MO.RELACCENT,"\u296e":e.MO.RELSTRETCH,"\u296f":e.MO.RELSTRETCH,"\u2970":e.MO.RELACCENT,"\u2971":e.MO.RELACCENT,"\u2972":e.MO.RELACCENT,"\u2973":e.MO.RELACCENT,"\u2974":e.MO.RELACCENT,"\u2975":e.MO.RELACCENT,"\u2976":e.MO.RELACCENT,"\u2977":e.MO.RELACCENT,"\u2978":e.MO.RELACCENT,"\u2979":e.MO.RELACCENT,"\u297a":e.MO.RELACCENT,"\u297b":e.MO.RELACCENT,"\u297c":e.MO.RELACCENT,"\u297d":e.MO.RELACCENT,"\u297e":e.MO.REL,"\u297f":e.MO.REL,"\u2981":e.MO.BIN3,"\u2982":e.MO.BIN3,"\u2999":e.MO.BIN3,"\u299a":e.MO.BIN3,"\u299b":e.MO.BIN3,"\u299c":e.MO.BIN3,"\u299d":e.MO.BIN3,"\u299e":e.MO.BIN3,"\u299f":e.MO.BIN3,"\u29a0":e.MO.BIN3,"\u29a1":e.MO.BIN3,"\u29a2":e.MO.BIN3,"\u29a3":e.MO.BIN3,"\u29a4":e.MO.BIN3,"\u29a5":e.MO.BIN3,"\u29a6":e.MO.BIN3,"\u29a7":e.MO.BIN3,"\u29a8":e.MO.BIN3,"\u29a9":e.MO.BIN3,"\u29aa":e.MO.BIN3,"\u29ab":e.MO.BIN3,"\u29ac":e.MO.BIN3,"\u29ad":e.MO.BIN3,"\u29ae":e.MO.BIN3,"\u29af":e.MO.BIN3,"\u29b0":e.MO.BIN3,"\u29b1":e.MO.BIN3,"\u29b2":e.MO.BIN3,"\u29b3":e.MO.BIN3,"\u29b4":e.MO.BIN3,"\u29b5":e.MO.BIN3,"\u29b6":e.MO.BIN4,"\u29b7":e.MO.BIN4,"\u29b8":e.MO.BIN4,"\u29b9":e.MO.BIN4,"\u29ba":e.MO.BIN4,"\u29bb":e.MO.BIN4,"\u29bc":e.MO.BIN4,"\u29bd":e.MO.BIN4,"\u29be":e.MO.BIN4,"\u29bf":e.MO.BIN4,"\u29c0":e.MO.REL,"\u29c1":e.MO.REL,"\u29c2":e.MO.BIN3,"\u29c3":e.MO.BIN3,"\u29c4":e.MO.BIN4,"\u29c5":e.MO.BIN4,"\u29c6":e.MO.BIN4,"\u29c7":e.MO.BIN4,"\u29c8":e.MO.BIN4,"\u29c9":e.MO.BIN3,"\u29ca":e.MO.BIN3,"\u29cb":e.MO.BIN3,"\u29cc":e.MO.BIN3,"\u29cd":e.MO.BIN3,"\u29ce":e.MO.REL,"\u29cf":e.MO.REL,"\u29cf\u0338":e.MO.REL,"\u29d0":e.MO.REL,"\u29d0\u0338":e.MO.REL,"\u29d1":e.MO.REL,"\u29d2":e.MO.REL,"\u29d3":e.MO.REL,"\u29d4":e.MO.REL,"\u29d5":e.MO.REL,"\u29d6":e.MO.BIN4,"\u29d7":e.MO.BIN4,"\u29d8":e.MO.BIN3,"\u29d9":e.MO.BIN3,"\u29db":e.MO.BIN3,"\u29dc":e.MO.BIN3,"\u29dd":e.MO.BIN3,"\u29de":e.MO.REL,"\u29df":e.MO.BIN3,"\u29e0":e.MO.BIN3,"\u29e1":e.MO.REL,"\u29e2":e.MO.BIN4,"\u29e3":e.MO.REL,"\u29e4":e.MO.REL,"\u29e5":e.MO.REL,"\u29e6":e.MO.REL,"\u29e7":e.MO.BIN3,"\u29e8":e.MO.BIN3,"\u29e9":e.MO.BIN3,"\u29ea":e.MO.BIN3,"\u29eb":e.MO.BIN3,"\u29ec":e.MO.BIN3,"\u29ed":e.MO.BIN3,"\u29ee":e.MO.BIN3,"\u29ef":e.MO.BIN3,"\u29f0":e.MO.BIN3,"\u29f1":e.MO.BIN3,"\u29f2":e.MO.BIN3,"\u29f3":e.MO.BIN3,"\u29f4":e.MO.REL,"\u29f5":e.MO.BIN4,"\u29f6":e.MO.BIN4,"\u29f7":e.MO.BIN4,"\u29f8":e.MO.BIN3,"\u29f9":e.MO.BIN3,"\u29fa":e.MO.BIN3,"\u29fb":e.MO.BIN3,"\u29fe":e.MO.BIN4,"\u29ff":e.MO.BIN4,"\u2a1d":e.MO.BIN3,"\u2a1e":e.MO.BIN3,"\u2a1f":e.MO.BIN3,"\u2a20":e.MO.BIN3,"\u2a21":e.MO.BIN3,"\u2a22":e.MO.BIN4,"\u2a23":e.MO.BIN4,"\u2a24":e.MO.BIN4,"\u2a25":e.MO.BIN4,"\u2a26":e.MO.BIN4,"\u2a27":e.MO.BIN4,"\u2a28":e.MO.BIN4,"\u2a29":e.MO.BIN4,"\u2a2a":e.MO.BIN4,"\u2a2b":e.MO.BIN4,"\u2a2c":e.MO.BIN4,"\u2a2d":e.MO.BIN4,"\u2a2e":e.MO.BIN4,"\u2a2f":e.MO.BIN4,"\u2a30":e.MO.BIN4,"\u2a31":e.MO.BIN4,"\u2a32":e.MO.BIN4,"\u2a33":e.MO.BIN4,"\u2a34":e.MO.BIN4,"\u2a35":e.MO.BIN4,"\u2a36":e.MO.BIN4,"\u2a37":e.MO.BIN4,"\u2a38":e.MO.BIN4,"\u2a39":e.MO.BIN4,"\u2a3a":e.MO.BIN4,"\u2a3b":e.MO.BIN4,"\u2a3c":e.MO.BIN4,"\u2a3d":e.MO.BIN4,"\u2a3e":e.MO.BIN4,"\u2a3f":e.MO.BIN4,"\u2a40":e.MO.BIN4,"\u2a41":e.MO.BIN4,"\u2a42":e.MO.BIN4,"\u2a43":e.MO.BIN4,"\u2a44":e.MO.BIN4,"\u2a45":e.MO.BIN4,"\u2a46":e.MO.BIN4,"\u2a47":e.MO.BIN4,"\u2a48":e.MO.BIN4,"\u2a49":e.MO.BIN4,"\u2a4a":e.MO.BIN4,"\u2a4b":e.MO.BIN4,"\u2a4c":e.MO.BIN4,"\u2a4d":e.MO.BIN4,"\u2a4e":e.MO.BIN4,"\u2a4f":e.MO.BIN4,"\u2a50":e.MO.BIN4,"\u2a51":e.MO.BIN4,"\u2a52":e.MO.BIN4,"\u2a53":e.MO.BIN4,"\u2a54":e.MO.BIN4,"\u2a55":e.MO.BIN4,"\u2a56":e.MO.BIN4,"\u2a57":e.MO.BIN4,"\u2a58":e.MO.BIN4,"\u2a59":e.MO.REL,"\u2a5a":e.MO.BIN4,"\u2a5b":e.MO.BIN4,"\u2a5c":e.MO.BIN4,"\u2a5d":e.MO.BIN4,"\u2a5e":e.MO.BIN4,"\u2a5f":e.MO.BIN4,"\u2a60":e.MO.BIN4,"\u2a61":e.MO.BIN4,"\u2a62":e.MO.BIN4,"\u2a63":e.MO.BIN4,"\u2a64":e.MO.BIN4,"\u2a65":e.MO.BIN4,"\u2a66":e.MO.REL,"\u2a67":e.MO.REL,"\u2a68":e.MO.REL,"\u2a69":e.MO.REL,"\u2a6a":e.MO.REL,"\u2a6b":e.MO.REL,"\u2a6c":e.MO.REL,"\u2a6d":e.MO.REL,"\u2a6e":e.MO.REL,"\u2a6f":e.MO.REL,"\u2a70":e.MO.REL,"\u2a71":e.MO.BIN4,"\u2a72":e.MO.BIN4,"\u2a73":e.MO.REL,"\u2a74":e.MO.REL,"\u2a75":e.MO.REL,"\u2a76":e.MO.REL,"\u2a77":e.MO.REL,"\u2a78":e.MO.REL,"\u2a79":e.MO.REL,"\u2a7a":e.MO.REL,"\u2a7b":e.MO.REL,"\u2a7c":e.MO.REL,"\u2a7d":e.MO.REL,"\u2a7d\u0338":e.MO.REL,"\u2a7e":e.MO.REL,"\u2a7e\u0338":e.MO.REL,"\u2a7f":e.MO.REL,"\u2a80":e.MO.REL,"\u2a81":e.MO.REL,"\u2a82":e.MO.REL,"\u2a83":e.MO.REL,"\u2a84":e.MO.REL,"\u2a85":e.MO.REL,"\u2a86":e.MO.REL,"\u2a87":e.MO.REL,"\u2a88":e.MO.REL,"\u2a89":e.MO.REL,"\u2a8a":e.MO.REL,"\u2a8b":e.MO.REL,"\u2a8c":e.MO.REL,"\u2a8d":e.MO.REL,"\u2a8e":e.MO.REL,"\u2a8f":e.MO.REL,"\u2a90":e.MO.REL,"\u2a91":e.MO.REL,"\u2a92":e.MO.REL,"\u2a93":e.MO.REL,"\u2a94":e.MO.REL,"\u2a95":e.MO.REL,"\u2a96":e.MO.REL,"\u2a97":e.MO.REL,"\u2a98":e.MO.REL,"\u2a99":e.MO.REL,"\u2a9a":e.MO.REL,"\u2a9b":e.MO.REL,"\u2a9c":e.MO.REL,"\u2a9d":e.MO.REL,"\u2a9e":e.MO.REL,"\u2a9f":e.MO.REL,"\u2aa0":e.MO.REL,"\u2aa1":e.MO.REL,"\u2aa1\u0338":e.MO.REL,"\u2aa2":e.MO.REL,"\u2aa2\u0338":e.MO.REL,"\u2aa3":e.MO.REL,"\u2aa4":e.MO.REL,"\u2aa5":e.MO.REL,"\u2aa6":e.MO.REL,"\u2aa7":e.MO.REL,"\u2aa8":e.MO.REL,"\u2aa9":e.MO.REL,"\u2aaa":e.MO.REL,"\u2aab":e.MO.REL,"\u2aac":e.MO.REL,"\u2aad":e.MO.REL,"\u2aae":e.MO.REL,"\u2aaf":e.MO.REL,"\u2aaf\u0338":e.MO.REL,"\u2ab0":e.MO.REL,"\u2ab0\u0338":e.MO.REL,"\u2ab1":e.MO.REL,"\u2ab2":e.MO.REL,"\u2ab3":e.MO.REL,"\u2ab4":e.MO.REL,"\u2ab5":e.MO.REL,"\u2ab6":e.MO.REL,"\u2ab7":e.MO.REL,"\u2ab8":e.MO.REL,"\u2ab9":e.MO.REL,"\u2aba":e.MO.REL,"\u2abb":e.MO.REL,"\u2abc":e.MO.REL,"\u2abd":e.MO.REL,"\u2abe":e.MO.REL,"\u2abf":e.MO.REL,"\u2ac0":e.MO.REL,"\u2ac1":e.MO.REL,"\u2ac2":e.MO.REL,"\u2ac3":e.MO.REL,"\u2ac4":e.MO.REL,"\u2ac5":e.MO.REL,"\u2ac6":e.MO.REL,"\u2ac7":e.MO.REL,"\u2ac8":e.MO.REL,"\u2ac9":e.MO.REL,"\u2aca":e.MO.REL,"\u2acb":e.MO.REL,"\u2acc":e.MO.REL,"\u2acd":e.MO.REL,"\u2ace":e.MO.REL,"\u2acf":e.MO.REL,"\u2ad0":e.MO.REL,"\u2ad1":e.MO.REL,"\u2ad2":e.MO.REL,"\u2ad3":e.MO.REL,"\u2ad4":e.MO.REL,"\u2ad5":e.MO.REL,"\u2ad6":e.MO.REL,"\u2ad7":e.MO.REL,"\u2ad8":e.MO.REL,"\u2ad9":e.MO.REL,"\u2ada":e.MO.REL,"\u2adb":e.MO.REL,"\u2adc":e.MO.REL,"\u2add":e.MO.REL,"\u2ade":e.MO.REL,"\u2adf":e.MO.REL,"\u2ae0":e.MO.REL,"\u2ae1":e.MO.REL,"\u2ae2":e.MO.REL,"\u2ae3":e.MO.REL,"\u2ae4":e.MO.REL,"\u2ae5":e.MO.REL,"\u2ae6":e.MO.REL,"\u2ae7":e.MO.REL,"\u2ae8":e.MO.REL,"\u2ae9":e.MO.REL,"\u2aea":e.MO.REL,"\u2aeb":e.MO.REL,"\u2aec":e.MO.REL,"\u2aed":e.MO.REL,"\u2aee":e.MO.REL,"\u2aef":e.MO.REL,"\u2af0":e.MO.REL,"\u2af1":e.MO.REL,"\u2af2":e.MO.REL,"\u2af3":e.MO.REL,"\u2af4":e.MO.BIN4,"\u2af5":e.MO.BIN4,"\u2af6":e.MO.BIN4,"\u2af7":e.MO.REL,"\u2af8":e.MO.REL,"\u2af9":e.MO.REL,"\u2afa":e.MO.REL,"\u2afb":e.MO.BIN4,"\u2afd":e.MO.BIN4,"\u2afe":e.MO.BIN3,"\u2b45":e.MO.RELSTRETCH,"\u2b46":e.MO.RELSTRETCH,"\u3008":e.MO.OPEN,"\u3009":e.MO.CLOSE,"\ufe37":e.MO.WIDEACCENT,"\ufe38":e.MO.WIDEACCENT}},e.OPTABLE.infix["^"]=e.MO.WIDEREL,e.OPTABLE.infix._=e.MO.WIDEREL,e.OPTABLE.prefix["\u2223"]=e.MO.OPEN,e.OPTABLE.prefix["\u2225"]=e.MO.OPEN,e.OPTABLE.postfix["\u2223"]=e.MO.CLOSE,e.OPTABLE.postfix["\u2225"]=e.MO.CLOSE},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__assign||function(){return(o=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},i=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o,s=r(24),c=(o=s.PrioritizedList,i(l,o),l.prototype.register=function(t){return this.add(t,t.priority)},l.prototype.unregister=function(t){this.remove(t)},l.prototype.handlesDocument=function(t){var e,r;try{for(var n=a(this),i=n.next();!i.done;i=n.next()){var o=i.value.item;if(o.handlesDocument(t))return o}}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}throw new Error("Can't find handler for document")},l.prototype.document=function(t,e){return void 0===e&&(e=null),this.handlesDocument(t).create(t,e)},l);function l(){return null!==o&&o.apply(this,arguments)||this}e.HandlerList=c},function(t,e,r){"use strict";var c=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},n=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},s=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0\n",o=e;e+=" ";try{for(var a=l(t.childNodes),s=a.next();!s.done;s=a.next()){var c=s.value;i+=this.visitNode(c,e)}}catch(t){r={error:t}}finally{try{s&&!s.done&&(n=a.return)&&n.call(a)}finally{if(r)throw r.error}}return i+="\n"+o+""},h.prototype.visitAnnotationNode=function(t,e){return e+""+this.childNodeMml(t,"","")+""},h.prototype.visitDefault=function(t,e){var r=t.kind,n=s(t.isToken||0===t.childNodes.length?["",""]:["\n",e],2),i=n[0],o=n[1],a=this.childNodeMml(t,e+" ",i);return e+"<"+r+this.getAttributes(t)+">"+(a.match(/\S/)?i+a+o:"")+""},h.prototype.childNodeMml=function(t,e,r){var n,i,o="";try{for(var a=l(t.childNodes),s=a.next();!s.done;s=a.next()){var c=s.value;o+=this.visitNode(c,e)+r}}catch(t){n={error:t}}finally{try{s&&!s.done&&(i=a.return)&&i.call(a)}finally{if(n)throw n.error}}return o},h.prototype.getAttributes=function(t){var e,r,n="",i=t.attributes.getAllAttributes();try{for(var o=l(Object.keys(i)),a=o.next();!a.done;a=o.next()){var s=a.value;void 0!==i[s]&&(n+=" "+s+'="'+this.quoteHTML(i[s].toString())+'"')}}catch(t){e={error:t}}finally{try{a&&!a.done&&(r=o.return)&&r.call(o)}finally{if(e)throw e.error}}return n},h.prototype.quoteHTML=function(t){return t.replace(/&/g,"&").replace(//g,">").replace(/\"/g,""").replace(/([\uD800-\uDBFF].)/g,function(t,e){return"&#x"+(1024*(e.charCodeAt(0)-55296)+(e.charCodeAt(1)-56320)+65536).toString(16).toUpperCase()+";"}).replace(/([\u0080-\uD7FF\uE000-\uFFFF])/g,function(t,e){return"&#x"+e.charCodeAt(0).toString(16).toUpperCase()+";"})},h);function h(){return null!==o&&o.apply(this,arguments)||this}e.SerializedMmlVisitor=c},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=(Object.defineProperty(i.prototype,"kind",{get:function(){return this.node.kind},enumerable:!0,configurable:!0}),i.prototype.wrap=function(t){return this.factory.wrap(t)},i);function i(t,e){this.factory=t,this.node=e}e.AbstractWrapper=n},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var s,a=r(41),w=r(3),c=r(100),l=r(101),u=r(102),h=r(13),f=(s=a.AbstractMathDocument,i(p,s),p.prototype.findPosition=function(t,e,r,n){var i,o;try{for(var a=T(n[t]),s=a.next();!s.done;s=a.next()){var c=s.value,l=_(c,2),u=l[0],h=l[1];if(e<=h)return{node:u,n:e,delim:r};e-=h}}catch(t){i={error:t}}finally{try{s&&!s.done&&(o=a.return)&&o.call(a)}finally{if(i)throw i.error}}return{node:null,n:0,delim:r}},p.prototype.mathItem=function(t,e,r){var n=t.math,i=this.findPosition(t.n,t.start.n,t.open,r),o=this.findPosition(t.n,t.end.n,t.close,r);return new this.options.MathItem(n,e,t.display,i,o)},p.prototype.findMath=function(t){var e,r,n,i,o,a,s,c,l;if(!this.processed.isSet("findMath")){this.adaptor.document=this.document,t=w.userOptions({elements:[this.adaptor.body(this.document)]},t);try{for(var u=T(this.adaptor.getElements(t.elements,this.document)),h=u.next();!h.done;h=u.next()){var f=h.value,p=_([null,null],2),d=p[0],m=p[1];try{for(var y=(n=void 0,T(this.inputJax)),v=y.next();!v.done;v=y.next()){var b=v.value,g=new this.options.MathList;if(b.processStrings){null===d&&(d=(o=_(this.domStrings.find(f),2))[0],m=o[1]);try{for(var M=(a=void 0,T(b.findMath(d))),O=M.next();!O.done;O=M.next()){var x=O.value;g.push(this.mathItem(x,b,m))}}catch(t){a={error:t}}finally{try{O&&!O.done&&(s=M.return)&&s.call(M)}finally{if(a)throw a.error}}}else try{for(var S=(c=void 0,T(b.findMath(f))),E=S.next();!E.done;E=S.next()){x=E.value;var C=new this.options.MathItem(x.math,b,x.display,x.start,x.end);g.push(C)}}catch(t){c={error:t}}finally{try{E&&!E.done&&(l=S.return)&&l.call(S)}finally{if(c)throw c.error}}this.math.merge(g)}}catch(t){n={error:t}}finally{try{v&&!v.done&&(i=y.return)&&i.call(y)}finally{if(n)throw n.error}}}}catch(t){e={error:t}}finally{try{h&&!h.done&&(r=u.return)&&r.call(u)}finally{if(e)throw e.error}}this.processed.set("findMath")}return this},p.prototype.updateDocument=function(){return this.processed.isSet("updateDocument")||(this.addPageElements(),this.addStyleSheet(),s.prototype.updateDocument.call(this),this.processed.set("updateDocument")),this},p.prototype.addPageElements=function(){var t=this.adaptor.body(this.document),e=this.documentPageElements();e&&this.adaptor.append(t,e)},p.prototype.addStyleSheet=function(){var t=this.documentStyleSheet();if(t){var e=this.adaptor.head(this.document),r=this.findSheet(e,this.adaptor.getAttribute(t,"id"));r?this.adaptor.replace(t,r):this.adaptor.append(e,t)}},p.prototype.findSheet=function(t,e){var r,n;if(e)try{for(var i=T(this.adaptor.tags(t,"style")),o=i.next();!o.done;o=i.next()){var a=o.value;if(this.adaptor.getAttribute(a,"id")===e)return a}}catch(t){r={error:t}}finally{try{o&&!o.done&&(n=i.return)&&n.call(i)}finally{if(r)throw r.error}}return null},p.prototype.removeFromDocument=function(t){var e,r;if(void 0===t&&(t=!1),this.processed.isSet("updateDocument"))try{for(var n=T(this.math),i=n.next();!i.done;i=n.next()){var o=i.value;o.state()>=h.STATE.INSERTED&&o.state(h.STATE.TYPESET,t)}}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}return this.processed.clear("updateDocument"),this},p.prototype.documentStyleSheet=function(){return this.outputJax.styleSheet(this)},p.prototype.documentPageElements=function(){return this.outputJax.pageElements(this)},p.KIND="HTML",p.OPTIONS=o(o({},a.AbstractMathDocument.OPTIONS),{renderActions:w.expandable(o(o({},a.AbstractMathDocument.OPTIONS.renderActions),{styles:[h.STATE.INSERTED+1,"","updateStyleSheet",!1]})),MathList:l.HTMLMathList,MathItem:c.HTMLMathItem,DomStrings:null}),p);function p(t,e,r){var n=this,i=_(w.separateOptions(r,u.HTMLDomStrings.OPTIONS),2),o=i[0],a=i[1];return(n=s.call(this,t,e,o)||this).domStrings=n.options.DomStrings||new u.HTMLDomStrings(a),n.domStrings.adaptor=e,n}e.HTMLDocument=f},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(13),s=(o=a.AbstractMathItem,i(c,o),Object.defineProperty(c.prototype,"adaptor",{get:function(){return this.inputJax.adaptor},enumerable:!0,configurable:!0}),c.prototype.updateDocument=function(t){if(this.state()=a.STATE.TYPESET){var e=this.start.node,r=this.adaptor.text("");if(t){var n=this.start.delim+this.math+this.end.delim;if(this.inputJax.processStrings)r=this.adaptor.text(n);else{var i=this.adaptor.parse(n,"text/html");r=this.adaptor.firstChild(this.adaptor.body(i))}}this.adaptor.replace(r,e),this.start.node=this.end.node=r,this.start.n=this.end.n=0}},c);function c(t,e,r,n,i){return void 0===r&&(r=!0),void 0===n&&(n={node:null,n:0,delim:""}),void 0===i&&(i={node:null,n:0,delim:""}),o.call(this,t,e,r,n,i)||this}e.HTMLMathItem=s},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(43),s=(o=a.AbstractMathList,i(c,o),c);function c(){return null!==o&&o.apply(this,arguments)||this}e.HTMLMathList=s},function(t,e,r){"use strict";var s=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var a=r(29),u=r(3),h=r(106),f=r(107),s=r(6),p=r(21),d=r(4),m=r(110),y=r(27),v=r(11);r(113);var b,g=(b=a.AbstractInputJax,i(M,b),M.configure=function(t){var e,r,n=v.Configuration.empty();try{for(var i=c(t),o=i.next();!o.done;o=i.next()){var a=o.value,s=v.ConfigurationHandler.get(a);s&&n.append(s)}}catch(t){e={error:t}}finally{try{o&&!o.done&&(r=i.return)&&r.call(i)}finally{if(e)throw e.error}}return n.init(n),n},M.tags=function(t,e){y.TagsFactory.addTags(e.tags),y.TagsFactory.setDefault(t.options.tags),t.tags=y.TagsFactory.getDefault(),t.tags.configuration=t},M.prototype.setMmlFactory=function(t){b.prototype.setMmlFactory.call(this,t),this._parseOptions.nodeFactory.setMmlFactory(t)},Object.defineProperty(M.prototype,"parseOptions",{get:function(){return this._parseOptions},enumerable:!0,configurable:!0}),M.prototype.compile=function(t,e){this.parseOptions.clear(),this.executeFilters(this.preFilters,t,e,this.parseOptions);var r,n=t.display;this.latex=t.math,this.parseOptions.tags.startEquation(t);try{r=new p.default(this.latex,{display:n,isInner:!1},this.parseOptions).mml()}catch(t){if(!(t instanceof d.default))throw t;this.parseOptions.error=!0,r=this.formatError(t)}return r=this.parseOptions.nodeFactory.create("node","math",[r]),n&&s.default.setAttribute(r,"display","block"),this.parseOptions.tags.finishEquation(t),this.parseOptions.root=r,this.executeFilters(this.postFilters,t,e,this.parseOptions),this.mathNode=this.parseOptions.root,this.mathNode},M.prototype.findMath=function(t){return this.findTeX.findMath(t)},M.prototype.formatError=function(t){var e=t.message.replace(/\n.*/,"");return this.parseOptions.nodeFactory.create("error",e,t.id,this.latex)},M.NAME="TeX",M.OPTIONS=o(o({},a.AbstractInputJax.OPTIONS),{FindTeX:null,packages:["base"],digits:/^(?:[0-9]+(?:\{,\}[0-9]{3})*(?:\.[0-9]*)?|\.[0-9]+)/,maxBuffer:5120}),M);function M(t){void 0===t&&(t={});var e=this,r=l(u.separateOptions(t,M.OPTIONS,h.FindTeX.OPTIONS),3),n=r[0],i=r[1],o=r[2];(e=b.call(this,i)||this).findTeX=e.options.FindTeX||new h.FindTeX(o);var a=e.options.packages,s=e.configuration=M.configure(a),c=e._parseOptions=new m.default(s,[e.options,y.TagsFactory.OPTIONS]);return u.userOptions(c.options,n),s.config(s,e),M.tags(c,s),e.postFilters.add(f.default.cleanSubSup,-5),e.postFilters.add(f.default.setInherited,-4),e.postFilters.add(f.default.cleanStretchy,-3),e.postFilters.add(f.default.cleanAttributes,-2),e.postFilters.add(f.default.combineRelations,-1),e}e.TeX=g},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),h=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var n,d=r(0),m=r(6);!function(t){t.cleanStretchy=function(t){var e,r,n=t.data;try{for(var i=p(n.getList("fixStretchy")),o=i.next();!o.done;o=i.next()){var a=o.value;if(m.default.getProperty(a,"fixStretchy")){var s=m.default.getForm(a);s&&s[3]&&s[3].stretchy&&m.default.setAttribute(a,"stretchy",!1);var c=a.parent;if(!(m.default.getTexClass(a)||s&&s[2])){var l=n.nodeFactory.create("node","TeXAtom",[a]);c.replaceChild(l,a),l.inheritAttributesFrom(a)}m.default.removeProperties(a,"fixStretchy")}}}catch(t){e={error:t}}finally{try{o&&!o.done&&(r=i.return)&&r.call(i)}finally{if(e)throw e.error}}},t.cleanAttributes=function(t){t.data.root.walkTree(function(t,e){var r,n,i=t.attributes;try{for(var o=p(i.getExplicitNames()),a=o.next();!a.done;a=o.next()){var s=a.value;i.attributes[s]===t.attributes.getInherited(s)&&delete i.attributes[s]}}catch(t){r={error:t}}finally{try{a&&!a.done&&(n=o.return)&&n.call(o)}finally{if(r)throw r.error}}},{})},t.combineRelations=function(t){var e,r;try{for(var n=p(t.data.getList("mo")),i=n.next();!i.done;i=n.next()){var o=i.value;if(!o.getProperty("relationsCombined")&&o.parent&&(!o.parent||m.default.isType(o.parent,"mrow"))&&m.default.getTexClass(o)===d.TEXCLASS.REL){for(var a=o.parent,s=void 0,c=a.childNodes,l=c.indexOf(o)+1,u=m.default.getProperty(o,"variantForm");l\u20d2",nvinfin:"\u29de",nvlArr:"\u2902",nvle:"\u2264\u20d2",nvlt:"<\u20d2",nvltrie:"\u22b4\u20d2",nvrArr:"\u2903",nvrtrie:"\u22b5\u20d2",nvsim:"\u223c\u20d2",nwArr:"\u21d6",nwarhk:"\u2923",nwarrow:"\u2196",nwnear:"\u2927"},"n")},function(t,e,r){"use strict";var u=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},h=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o=r(111),a=r(8),s=r(112),l=r(3),u=(h.prototype.pushParser=function(t){this.parsers.unshift(t)},h.prototype.popParser=function(){this.parsers.shift()},Object.defineProperty(h.prototype,"parser",{get:function(){return this.parsers[0]},enumerable:!0,configurable:!0}),h.prototype.clear=function(){this.parsers=[],this.root=null,this.nodeLists={},this.error=!1,this.tags.resetTag()},h.prototype.addNode=function(t,e){var r=this.nodeLists[t];(r=r||(this.nodeLists[t]=[])).push(e)},h.prototype.getList=function(t){var e,r,n=this.nodeLists[t]||[],i=[];try{for(var o=c(n),a=o.next();!a.done;a=o.next()){var s=a.value;this.inTree(s)&&i.push(s)}}catch(t){e={error:t}}finally{try{a&&!a.done&&(r=o.return)&&r.call(o)}finally{if(e)throw e.error}}return this.nodeLists[t]=i},h.prototype.inTree=function(t){for(;t&&t!==this.root;)t=t.parent;return!!t},h);function h(t,e){void 0===e&&(e=[]),this.options={},this.parsers=[],this.root=null,this.nodeLists={},this.error=!1,this.handlers=new a.SubHandlers(t),this.nodeFactory=new s.NodeFactory,(this.nodeFactory.configuration=this).nodeFactory.setCreators(t.nodes),this.itemFactory=new o.default(t.items),this.itemFactory.configuration=this,l.defaultOptions.apply(void 0,i([this.options],e)),l.defaultOptions(this.options,t.options)}e.default=u},function(t,e,r){"use strict";var n,i,o=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var a,s=r(32),c=r(30),l=(a=s.BaseItem,o(u,a),u);function u(){return null!==a&&a.apply(this,arguments)||this}var h,f=(h=c.AbstractFactory,o(p,h),p.DefaultStackItems=((i={})[l.prototype.kind]=l,i),p);function p(){var t=null!==h&&h.apply(this,arguments)||this;return t.defaultKind="dummy",t.configuration=null,t}e.default=f},function(t,e,r){"use strict";var n=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},r=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0"),i=this.checkForErrors(this.adaptor.parse(n,"text/"+this.options.parseAs)),o=this.adaptor.body(i);1!==this.adaptor.childNodes(o).length&&this.error("MathML must consist of a single element"),r=this.adaptor.remove(this.adaptor.firstChild(o)),"math"!==this.adaptor.kind(r).replace(/^[a-z]+:/,"")&&this.error("MathML must be formed by a element, not <"+this.adaptor.kind(r)+">")}return r=this.executeFilters(this.mmlFilters,t,e,r),this.executeFilters(this.postFilters,t,e,this.mathml.compile(r))},p.prototype.checkForErrors=function(t){var e=this.adaptor.tags(this.adaptor.body(t),"parsererror")[0];return e&&(""===this.adaptor.textContent(e)&&this.error("Error processing MathML"),this.options.parseError.call(this,e)),t},p.prototype.error=function(t){throw new Error(t)},p.prototype.findMath=function(t){return this.findMathML.findMath(t)},p.NAME="MathML",p.OPTIONS=c.defaultOptions({parseAs:"html",forceReparse:!1,FindMathML:null,MathMLCompile:null,parseError:function(t){this.error(this.adaptor.textContent(t).replace(/\n.*/g,""))}},o.AbstractInputJax.OPTIONS),p);function p(t){void 0===t&&(t={});var e=this,r=a(c.separateOptions(t,u.FindMathML.OPTIONS,h.MathMLCompile.OPTIONS),3),n=r[0],i=r[1],o=r[2];return(e=s.call(this,n)||this).findMathML=e.options.FindMathML||new u.FindMathML(i),e.mathml=e.options.MathMLCompile||new h.MathMLCompile(o),e.mmlFilters=new l.FunctionList,e}e.MathML=f},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),d=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(40),m="http://www.w3.org/1998/Math/MathML",s=(o=a.AbstractFindMath,i(c,o),c.prototype.findMath=function(t){var e=new Set;this.findMathNodes(t,e),this.findMathPrefixed(t,e);var r=this.adaptor.root(this.adaptor.document);return"html"===this.adaptor.kind(r)&&0===e.size&&this.findMathNS(t,e),this.processMath(e)},c.prototype.findMathNodes=function(t,e){var r,n;try{for(var i=d(this.adaptor.tags(t,"math")),o=i.next();!o.done;o=i.next()){var a=o.value;e.add(a)}}catch(t){r={error:t}}finally{try{o&&!o.done&&(n=i.return)&&n.call(i)}finally{if(r)throw r.error}}},c.prototype.findMathPrefixed=function(t,e){var r,n,i,o,a=this.adaptor.root(this.adaptor.document);try{for(var s=d(this.adaptor.allAttributes(a)),c=s.next();!c.done;c=s.next()){var l=c.value;if("xmlns:"===l.name.substr(0,6)&&l.value===m){var u=l.name.substr(6);try{for(var h=(i=void 0,d(this.adaptor.tags(t,u+":math"))),f=h.next();!f.done;f=h.next()){var p=f.value;e.add(p)}}catch(t){i={error:t}}finally{try{f&&!f.done&&(o=h.return)&&o.call(h)}finally{if(i)throw i.error}}}}}catch(t){r={error:t}}finally{try{c&&!c.done&&(n=s.return)&&n.call(s)}finally{if(r)throw r.error}}},c.prototype.findMathNS=function(t,e){var r,n;try{for(var i=d(this.adaptor.tags(t,"math",m)),o=i.next();!o.done;o=i.next()){var a=o.value;e.add(a)}}catch(t){r={error:t}}finally{try{o&&!o.done&&(n=i.return)&&n.call(i)}finally{if(r)throw r.error}}},c.prototype.processMath=function(t){var e,r,n=[];try{for(var i=d(Array.from(t)),o=i.next();!o.done;o=i.next()){var a=o.value,s="block"===this.adaptor.getAttribute(a,"display")||"display"===this.adaptor.getAttribute(a,"mode"),c={node:a,n:0,delim:""},l={node:a,n:0,delim:""};n.push({math:this.adaptor.outerHTML(a),start:c,end:l,display:s})}}catch(t){e={error:t}}finally{try{o&&!o.done&&(r=i.return)&&r.call(i)}finally{if(e)throw e.error}}return n},c.OPTIONS={},c);function c(){return null!==o&&o.apply(this,arguments)||this}e.FindMathML=s},function(t,e,r){"use strict";var n=this&&this.__assign||function(){return(n=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var i=r(0),o=r(3),a=r(12),s=(c.prototype.setMmlFactory=function(t){this.factory=t},c.prototype.compile=function(t){var e=this.makeNode(t);return e.verifyTree(this.options.verify),e.setInheritedAttributes({},!1,0,!1),e.walkTree(this.markMrows),e},c.prototype.makeNode=function(t){var e,r,n=!1,i="",o=this.adaptor.kind(t).replace(/^.*:/,"");try{for(var a=u(this.adaptor.allClasses(t)),s=a.next();!s.done;s=a.next()){var c=s.value;c.match(/^MJX-TeXAtom-/)?(i=c.substr(12),o="TeXAtom"):"MJX-fixedlimits"===c&&(n=!0)}}catch(t){e={error:t}}finally{try{s&&!s.done&&(r=a.return)&&r.call(a)}finally{if(e)throw e.error}}this.factory.getNodeClass(o)||this.error('Unknown node type "'+o+'"');var l=this.factory.create(o);return i&&this.texAtom(l,i,n),this.addAttributes(l,t),this.checkClass(l,t),this.addChildren(l,t),l},c.prototype.addAttributes=function(t,e){var r,n;try{for(var i=u(this.adaptor.allAttributes(e)),o=i.next();!o.done;o=i.next()){var a=o.value,s=a.name;if("class"!==s){var c=this.filterAttribute(s,a.value);if(null!==c){var l=c.toLowerCase();"true"===l||"false"===l?t.attributes.set(s,"true"===l):t.attributes.set(s,c)}}}}catch(t){r={error:t}}finally{try{o&&!o.done&&(n=i.return)&&n.call(i)}finally{if(r)throw r.error}}},c.prototype.filterAttribute=function(t,e){return e},c.prototype.addChildren=function(t,e){var r,n;if(0!==t.arity)try{for(var i=u(this.adaptor.childNodes(e)),o=i.next();!o.done;o=i.next()){var a=o.value,s=this.adaptor.kind(a);if("#comment"!==s)if("#text"===s)this.addText(t,a);else if(t.isKind("annotation-xml"))t.appendChild(this.factory.create("XML").setXML(a));else{var c=t.appendChild(this.makeNode(a));0===c.arity&&this.adaptor.childNodes(a).length&&(this.options.fixMisplacedChildren?this.addChildren(t,a):c.mError("There should not be children for "+c.kind+" nodes",this.options.verify,!0))}}}catch(t){r={error:t}}finally{try{o&&!o.done&&(n=i.return)&&n.call(i)}finally{if(r)throw r.error}}},c.prototype.addText=function(t,e){var r=this.adaptor.value(e);(t.isToken||t.getProperty("isChars"))&&t.arity?(t.isToken&&(r=a.translate(r),r=this.trimSpace(r)),t.appendChild(this.factory.create("text").setText(r))):r.match(/\S/)&&this.error('Unexpected text node "'+r+'"')},c.prototype.checkClass=function(t,e){var r,n,i=[];try{for(var o=u(this.adaptor.allClasses(e)),a=o.next();!a.done;a=o.next()){var s=a.value;"MJX-"===s.substr(0,4)?"MJX-variant"===s?t.setProperty("variantForm",!0):"MJX-TeXAtom"!==s.substr(0,11)&&t.attributes.set("mathvariant",s.substr(3)):i.push(s)}}catch(t){r={error:t}}finally{try{a&&!a.done&&(n=o.return)&&n.call(o)}finally{if(r)throw r.error}}i.length&&t.attributes.set("class",i.join(" "))},c.prototype.texAtom=function(t,e,r){t.texClass=i.TEXCLASS[e],"OP"!==e||r||(t.setProperty("movesupsub",!0),t.attributes.setInherited("movablelimits",!0))},c.prototype.markMrows=function(t){if(t.isKind("mrow")&&!t.isInferred&&2<=t.childNodes.length){var e=t.childNodes[0],r=t.childNodes[t.childNodes.length-1];e.isKind("mo")&&e.attributes.get("fence")&&r.isKind("mo")&&r.attributes.get("fence")&&(e.childNodes.length&&t.setProperty("open",e.getText()),r.childNodes.length&&t.setProperty("close",r.getText()))}},c.prototype.trimSpace=function(t){return t.replace(/[\t\n\r]/g," ").trim().replace(/ +/g," ")},c.prototype.error=function(t){throw new Error(t)},c.OPTIONS={MmlFactory:null,fixMisplacedChildren:!0,verify:{},translateEntities:!0},c.VERIFY=n({},i.AbstractMmlNode.verifyDefaults),c);function c(t){void 0===t&&(t={});var e=this.constructor;this.options=o.userOptions(o.defaultOptions({},e.OPTIONS),t),this.options.verify&&(this.options.verify=o.userOptions(o.defaultOptions({},e.VERIFY),this.options.verify))}e.MathMLCompile=s},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__assign||function(){return(o=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var s,c=r(123),l=r(125),u=r(77),h=(s=c.CommonOutputJax,i(f,s),f.prototype.escaped=function(t,e){return this.setDocument(e),this.html("span",{},[this.text(t.math)])},f.prototype.styleSheet=function(t){var e=s.prototype.styleSheet.call(this,t);return this.adaptor.setAttribute(e,"id",f.STYLESHEETID),e},f.prototype.addClassStyles=function(t){var e;this.options.adaptiveCSS&&!t.used||(t.autoStyle&&"unknown"!==t.kind&&this.cssStyles.addStyles(((e={})["mjx-"+t.kind]={display:"inline-block","text-align":"left"},e)),s.prototype.addClassStyles.call(this,t))},f.prototype.processMath=function(t,e){this.factory.wrap(t).toCHTML(e)},f.prototype.clearCache=function(){var e,t;this.cssStyles.clear(),this.font.clearCache();try{for(var r=a(this.factory.getKinds()),n=r.next();!n.done;n=r.next()){var i=n.value;this.factory.getNodeClass(i).used=!1}}catch(t){e={error:t}}finally{try{n&&!n.done&&(t=r.return)&&t.call(r)}finally{if(e)throw e.error}}},f.prototype.unknownText=function(t,e){var r={},n=100/this.math.metrics.scale;return 100!=n&&(r["font-size"]=this.fixed(n,1)+"%"),"-explicitFont"!==e&&this.cssFontStyles(this.font.getCssFont(e),r),this.html("mjx-utext",{variant:e,style:r},[this.text(t)])},f.prototype.measureTextNode=function(t){var e=this.adaptor;t=e.clone(t);var r=this.html("mjx-measure-text",{},[t]);e.append(e.parent(this.math.start.node),this.container),e.append(this.container,r);var n=e.nodeSize(t,this.math.metrics.em)[0]/this.math.metrics.scale;return e.remove(this.container),e.remove(r),{w:n,h:.75,d:.25}},f.prototype.getFontData=function(t){var e=s.prototype.getFontData.call(this,t);return e[0]="MJXZERO, "+e[0],e},f.NAME="CHTML",f.OPTIONS=o(o({},c.CommonOutputJax.OPTIONS),{adaptiveCSS:!0}),f.commonStyles={'mjx-container [space="1"]':{"margin-left":".111em"},'mjx-container [space="2"]':{"margin-left":".167em"},'mjx-container [space="3"]':{"margin-left":".222em"},'mjx-container [space="4"]':{"margin-left":".278em"},'mjx-container [space="5"]':{"margin-left":".333em"},'mjx-container [rspace="1"]':{"margin-right":".111em"},'mjx-container [rspace="2"]':{"margin-right":".167em"},'mjx-container [rspace="3"]':{"margin-right":".222em"},'mjx-container [rspace="4"]':{"margin-right":".278em"},'mjx-container [rspace="5"]':{"margin-right":".333em"},'mjx-container [size="s"]':{"font-size":"70.7%"},'mjx-container [size="ss"]':{"font-size":"50%"},'mjx-container [size="Tn"]':{"font-size":"60%"},'mjx-container [size="sm"]':{"font-size":"85%"},'mjx-container [size="lg"]':{"font-size":"120%"},'mjx-container [size="Lg"]':{"font-size":"144%"},'mjx-container [size="LG"]':{"font-size":"173%"},'mjx-container [size="hg"]':{"font-size":"207%"},'mjx-container [size="HG"]':{"font-size":"249%"},'mjx-container [width="full"]':{width:"100%"},"mjx-box":{display:"inline-block"},"mjx-block":{display:"block"},"mjx-itable":{display:"inline-table"},"mjx-row":{display:"table-row"},"mjx-row > *":{display:"table-cell"},"mjx-mtext":{display:"inline-block"},"mjx-mstyle":{display:"inline-block"},"mjx-merror":{display:"inline-block",color:"red","background-color":"yellow"},"mjx-mphantom":{visibility:"hidden"}},f.STYLESHEETID="MJX-CHTML-styles",f);function f(t){void 0===t&&(t=null);var e=s.call(this,t,l.CHTMLWrapperFactory,u.TeXFont)||this;return e.font.adaptiveCSS(e.options.adaptiveCSS),e}e.CHTML=h},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__assign||function(){return(o=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var c,a=r(42),l=r(3),u=r(124),h=r(14),f=r(71),p=(c=a.AbstractOutputJax,i(d,c),d.prototype.typeset=function(t,e){this.setDocument(e);var r=this.createNode();return this.toDOM(t,r,e),r},d.prototype.createNode=function(){var t=this.constructor.NAME;return this.html("mjx-container",{class:"MathJax",jax:t})},d.prototype.setScale=function(t){var e=this.math.metrics.scale*this.options.scale;1!=e&&this.adaptor.setStyle(t,"fontSize",h.percent(e))},d.prototype.toDOM=function(t,e,r){void 0===r&&(r=null),this.setDocument(r),this.math=t,this.pxPerEm=t.metrics.ex/this.font.params.x_height,t.root.setTeXclass(null),this.setScale(e),this.nodeMap=new Map,this.container=e,this.processMath(t.root,e),this.nodeMap=null,this.executeFilters(this.postFilters,t,r,e)},d.prototype.getBBox=function(t,e){this.setDocument(e),(this.math=t).root.setTeXclass(null),this.nodeMap=new Map;var r=this.factory.wrap(t.root).getBBox();return this.nodeMap=null,r},d.prototype.getMetrics=function(t){var e,r;this.setDocument(t);var n=this.adaptor,i=this.getMetricMaps(t);try{for(var o=w(t.math),a=o.next();!a.done;a=o.next()){var s=a.value,c=i[s.display?1:0].get(n.parent(s.start.node)),l=c.em,u=c.ex,h=c.containerWidth,f=c.lineWidth,p=c.scale;s.setMetrics(l,u,h,f,p)}}catch(t){e={error:t}}finally{try{a&&!a.done&&(r=o.return)&&r.call(o)}finally{if(e)throw e.error}}},d.prototype.getMetricsFor=function(t,e){var r=this.getTestElement(t,e),n=this.measureMetrics(r);return this.adaptor.remove(r),n},d.prototype.getMetricMaps=function(t){var e,r,n,i,o,a,s,c,l,u,h=this.adaptor,f=[new Map,new Map];try{for(var p=w(t.math),d=p.next();!d.done;d=p.next()){var m=d.value,y=h.parent(m.start.node),v=f[m.display?1:0];v.has(y)||v.set(y,this.getTestElement(y,m.display))}}catch(t){e={error:t}}finally{try{d&&!d.done&&(r=p.return)&&r.call(p)}finally{if(e)throw e.error}}var b=[new Map,new Map];try{for(var g=w(b.keys()),M=g.next();!M.done;M=g.next()){var O=M.value;try{for(var x=(o=void 0,w(f[O].keys())),S=x.next();!S.done;S=x.next())y=S.value,b[O].set(y,this.measureMetrics(f[O].get(y)))}catch(t){o={error:t}}finally{try{S&&!S.done&&(a=x.return)&&a.call(x)}finally{if(o)throw o.error}}}}catch(t){n={error:t}}finally{try{M&&!M.done&&(i=g.return)&&i.call(g)}finally{if(n)throw n.error}}try{for(var E=w(b.keys()),C=E.next();!C.done;C=E.next()){O=C.value;try{for(var _=(l=void 0,w(f[O].values())),T=_.next();!T.done;T=_.next())y=T.value,h.remove(y)}catch(t){l={error:t}}finally{try{T&&!T.done&&(u=_.return)&&u.call(_)}finally{if(l)throw l.error}}}}catch(t){s={error:t}}finally{try{C&&!C.done&&(c=E.return)&&c.call(E)}finally{if(s)throw s.error}}return b},d.prototype.getTestElement=function(t,e){var r=this.adaptor;if(!this.testInline){this.testInline=this.html("mjx-test",{style:{display:"inline-block",width:"100%","font-style":"normal","font-weight":"normal","font-size":"100%","font-size-adjust":"none","text-indent":0,"text-transform":"none","letter-spacing":"normal","word-spacing":"normal",overflow:"hidden",height:"1px","margin-right":"-1px"}},[this.html("mjx-left-box",{style:{display:"inline-block",width:0,float:"left"}}),this.html("mjx-ex-box",{style:{position:"absolute",overflow:"hidden",width:"1px",height:"60ex"}}),this.html("mjx-right-box",{style:{display:"inline-block",width:0,float:"right"}})]),this.testDisplay=r.clone(this.testInline),r.setStyle(this.testDisplay,"display","table"),r.setStyle(this.testDisplay,"margin-right",""),r.setStyle(r.firstChild(this.testDisplay),"display","none");var n=r.lastChild(this.testDisplay);r.setStyle(n,"display","table-cell"),r.setStyle(n,"width","10000em"),r.setStyle(n,"float","")}return r.append(t,r.clone(e?this.testDisplay:this.testInline))},d.prototype.measureMetrics=function(t){var e=this.adaptor,r=e.fontSize(t),n=e.nodeSize(e.childNode(t,1))[1]/60||r*this.options.exFactor;return{em:r,ex:n,containerWidth:"table"===e.getStyle(t,"display")?e.nodeSize(e.lastChild(t))[0]-1:e.nodeBBox(e.lastChild(t)).left-e.nodeBBox(e.firstChild(t)).left-2,lineWidth:1e6,scale:Math.max(this.options.minScale,this.options.matchFontHeight?n/this.font.params.x_height/r:1)}},d.prototype.styleSheet=function(t){var e,r;this.setDocument(t),this.cssStyles.clear(),this.cssStyles.addStyles(this.constructor.commonStyles);try{for(var n=w(this.factory.getKinds()),i=n.next();!i.done;i=n.next()){var o=i.value;this.addClassStyles(this.factory.getNodeClass(o))}}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}return this.cssStyles.addStyles(this.font.styles),this.html("style",{id:"MJX-styles"},[this.text("\n"+this.cssStyles.cssText+"\n")])},d.prototype.addClassStyles=function(t){this.cssStyles.addStyles(t.styles)},d.prototype.setDocument=function(t){t&&(this.document=t,this.adaptor.document=t.document)},d.prototype.html=function(t,e,r,n){return void 0===e&&(e={}),void 0===r&&(r=[]),this.adaptor.node(t,e,r,n)},d.prototype.text=function(t){return this.adaptor.text(t)},d.prototype.fixed=function(t,e){return void 0===e&&(e=3),Math.abs(t)<6e-4?"0":t.toFixed(e).replace(/\.?0+$/,"")},d.prototype.measureText=function(t,e,r){void 0===r&&(r=["",!1,!1]);var n=this.unknownText(t,e);if("-explicitFont"===e){var i=this.cssFontStyles(r);this.adaptor.setAttributes(n,{style:i})}return this.measureTextNodeWithCache(n,t,e,r)},d.prototype.measureTextNodeWithCache=function(t,e,r,n){void 0===n&&(n=["",!1,!1]),"-explicitFont"===r&&(r=[n[0],n[1]?"T":"F",n[2]?"T":"F",""].join("-")),this.unknownCache.has(r)||this.unknownCache.set(r,new Map);var i=this.unknownCache.get(r),o=i.get(e);if(o)return o;var a=this.measureTextNode(t);return i.set(e,a),a},d.prototype.cssFontStyles=function(t,e){void 0===e&&(e={});var r=s(t,3),n=r[0],i=r[1],o=r[2];return e["font-family"]=n,i&&(e["font-style"]="italic"),o&&(e["font-weight"]="bold"),e},d.prototype.getFontData=function(t){return[(t=t||new f.Styles).get("font-family"),"italic"===t.get("font-style"),"bold"===t.get("font-weight")]},d.NAME="Common",d.OPTIONS=o(o({},a.AbstractOutputJax.OPTIONS),{scale:1,minScale:.5,matchFontHeight:!0,mtextInheritFont:!1,merrorInheritFont:!0,mathmlSpacing:!1,skipAttributes:{},exFactor:.5,displayAlign:"center",displayIndent:"0",wrapperFactory:null,font:null,cssStyles:null}),d.commonStyles={},d);function d(t,e,r){void 0===t&&(t=null),void 0===e&&(e=null),void 0===r&&(r=null);var n=this,i=s(l.separateOptions(t,r.OPTIONS),2),o=i[0],a=i[1];return(n=c.call(this,o)||this).factory=n.options.wrapperFactory||new e,(n.factory.jax=n).cssStyles=n.options.cssStyles||new u.CssStyles,n.font=n.options.font||new r(a),n.unknownCache=new Map,n}e.CommonOutputJax=p},function(t,e,r){"use strict";var l=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var n=(Object.defineProperty(i.prototype,"cssText",{get:function(){return this.getStyleString()},enumerable:!0,configurable:!0}),i.prototype.addStyles=function(t){var e,r;if(t)try{for(var n=l(Object.keys(t)),i=n.next();!i.done;i=n.next()){var o=i.value;this.styles[o]||(this.styles[o]={}),Object.assign(this.styles[o],t[o])}}catch(t){e={error:t}}finally{try{i&&!i.done&&(r=n.return)&&r.call(n)}finally{if(e)throw e.error}}},i.prototype.removeStyles=function(){for(var e,t,r=[],n=0;n=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},o=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var f,o=r(2),d=r(135),a=r(26),p=r(129),s=(f=d.CommonMoMixin(o.CHTMLWrapper),i(c,f),c.prototype.toCHTML=function(t){var e,r,n=this.node.attributes,i=n.get("symmetric")&&2!==this.stretch.dir,o=0!==this.stretch.dir;o&&null===this.size&&this.getStretchedVariant([]);var a=this.standardCHTMLnode(t);if(this.noIC&&this.adaptor.setAttribute(a,"noIC","true"),o&&this.size<0)this.stretchHTML(a,i);else{if(i||n.get("largeop")){var s=p.BBox.empty();f.prototype.computeBBox.call(this,s);var c=this.em((s.d-s.h)/2+this.font.params.axis_height);"0"!==c&&this.adaptor.setStyle(a,"verticalAlign",c)}try{for(var l=h(this.childNodes),u=l.next();!u.done;u=l.next())u.value.toCHTML(a)}catch(t){e={error:t}}finally{try{u&&!u.done&&(r=l.return)&&r.call(l)}finally{if(e)throw e.error}}}},c.prototype.stretchHTML=function(t,e){var r=this.getText().charCodeAt(0),n=this.stretch;n.used=!0;var i=n.stretch,o=[];i[0]&&o.push(this.html("mjx-beg",{},[this.html("mjx-c")])),o.push(this.html("mjx-ext",{},[this.html("mjx-c")])),4===i.length&&o.push(this.html("mjx-mid",{},[this.html("mjx-c")]),this.html("mjx-ext",{},[this.html("mjx-c")])),i[2]&&o.push(this.html("mjx-end",{},[this.html("mjx-c")]));var a={},s=this.bbox,c=s.h,l=s.d,u=s.w;1===n.dir?(o.push(this.html("mjx-mark")),a.height=this.em(c+l),a.verticalAlign=this.em(-l)):a.width=this.em(u);var h=d.DirectionVH[n.dir],f={class:this.char(n.c||r),style:a},p=this.html("mjx-stretchy-"+h,f,o);this.adaptor.append(t,p)},c.kind=a.MmlMo.prototype.kind,c.styles={"mjx-stretchy-h":{display:"inline-table",width:"100%"},"mjx-stretchy-h > *":{display:"table-cell",width:0},"mjx-stretchy-h > * > mjx-c":{display:"inline-block"},"mjx-stretchy-h > * > mjx-c::before":{padding:".001em 0",width:"initial"},"mjx-stretchy-h > mjx-ext":{overflow:"hidden",width:"100%"},"mjx-stretchy-h > mjx-ext > mjx-c::before":{transform:"scalex(500)"},"mjx-stretchy-h > mjx-ext > mjx-c":{width:0},"mjx-stretchy-h > mjx-beg > mjx-c":{"margin-right":"-.1em"},"mjx-stretchy-h > mjx-end > mjx-c":{"margin-left":"-.1em"},"mjx-stretchy-v":{display:"inline-block"},"mjx-stretchy-v > *":{display:"block"},"mjx-stretchy-v > mjx-beg":{height:0},"mjx-stretchy-v > mjx-end > mjx-c":{display:"block"},"mjx-stretchy-v > * > mjx-c":{transform:"scale(1)","transform-origin":"left center",overflow:"hidden"},"mjx-stretchy-v > mjx-ext":{display:"block",height:"100%","box-sizing":"border-box",border:"0px solid transparent",overflow:"hidden"},"mjx-stretchy-v > mjx-ext > mjx-c::before":{width:"initial"},"mjx-stretchy-v > mjx-ext > mjx-c":{transform:"scaleY(500) translateY(.1em)",overflow:"visible"},"mjx-mark":{display:"inline-block",height:"0px"}},c);function c(){return null!==f&&f.apply(this,arguments)||this}e.CHTMLmo=s},function(t,e,r){"use strict";var n,i,o=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),m=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var s=r(23);e.DirectionVH=((i={})[1]="v",i[2]="h",i),e.CommonMoMixin=function(t){return o(e,i=t),e.prototype.computeBBox=function(t,e){void 0===e&&(e=!1);var r=0!==this.stretch.dir;if(r&&null===this.size&&this.getStretchedVariant([0]),!(r&&this.size<0)&&(i.prototype.computeBBox.call(this,t),this.copySkewIC(t),this.noIC&&(t.w-=t.ic),this.node.attributes.get("symmetric")&&2!==this.stretch.dir)){var n=(t.h+t.d)/2+this.font.params.axis_height-t.h;t.h+=n,t.d-=n}},e.prototype.getVariant=function(){this.node.attributes.get("largeop")?this.variant=this.node.attributes.get("displaystyle")?"-largeop":"-smallop":i.prototype.getVariant.call(this)},e.prototype.canStretch=function(t){if(0!==this.stretch.dir)return this.stretch.dir===t;if(!this.node.attributes.get("stretchy"))return!1;var e=this.getText();if(1!==e.length)return!1;var r=this.font.getDelimiter(e.charCodeAt(0));return this.stretch=r&&r.dir===t?r:s.NOSTRETCH,0!==this.stretch.dir},e.prototype.getStretchedVariant=function(t,e){var r,n;if(void 0===e&&(e=!1),0!==this.stretch.dir){var i=this.getWH(t),o=this.getSize("minsize",0),a=this.getSize("maxsize",1/0);i=Math.max(o,Math.min(a,i));var s=o||e?i:Math.max(i*this.font.params.delimiterfactor/1e3,i-this.font.params.delimitershortfall),c=this.stretch,l=c.c||this.getText().charCodeAt(0),u=0;if(c.sizes)try{for(var h=p(c.sizes),f=h.next();!f.done;f=h.next()){if(s<=f.value)return this.variant=this.font.getSizeVariant(l,u),void(this.size=u);u++}}catch(t){r={error:t}}finally{try{f&&!f.done&&(n=h.return)&&n.call(h)}finally{if(r)throw r.error}}c.stretch?(this.size=-1,this.invalidateBBox(),this.getStretchBBox(t,i,c)):(this.variant=this.font.getSizeVariant(l,u-1),this.size=u-1)}},e.prototype.getSize=function(t,e){var r=this.node.attributes;return r.isSet(t)&&(e=this.length2em(r.get(t),1,1)),e},e.prototype.getWH=function(t){if(0===t.length)return 0;if(1===t.length)return t[0];var e=m(t,2),r=e[0],n=e[1],i=this.font.params.axis_height;return this.node.attributes.get("symmetric")?2*Math.max(r-i,n+i):r+n},e.prototype.getStretchBBox=function(t,e,r){var n;r.hasOwnProperty("min")&&r.min>e&&(e=r.min);var i=m(r.HDW,3),o=i[0],a=i[1],s=i[2];1===this.stretch.dir?(o=(n=m(this.getBaseline(t,e,r),2))[0],a=n[1]):s=e,this.bbox.h=o,this.bbox.d=a,this.bbox.w=s},e.prototype.getBaseline=function(t,e,r){var n=2===t.length&&t[0]+t[1]===e,i=this.node.attributes.get("symmetric"),o=m(n?t:[e,0],2),a=o[0],s=o[1],c=m([a+s,0],2),l=c[0],u=c[1];if(i){var h=this.font.params.axis_height;n&&(l=2*Math.max(a-h,s+h)),u=l/2-h}else if(n)u=s;else{var f=m(r.HDW||[.75,.25],2),p=f[0],d=f[1];u=d*(l/(p+d))}return[l-u,u]},e.prototype.remapChars=function(t){if(1==t.length){var e=this.node.parent,r=this.isAccent&&(e===this.node.coreParent()||e.isEmbellished)?"accent":"mo",n=this.font.getRemappedChar(r,t[0]);n&&(t=this.unicodeChars(n))}return t},e;function e(){for(var t=[],e=0;e=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(2),s=r(145),c=r(56),l=(o=s.CommonMpaddedMixin(a.CHTMLWrapper),i(u,o),u.prototype.toCHTML=function(t){var e,r,n=this.standardCHTMLnode(t),i=[],o={},a=v(this.getDimens(),9),s=(a[0],a[1],a[2]),c=a[3],l=a[4],u=a[5],h=a[6],f=a[7],p=a[8];if(u&&(o.width=this.em(s+u)),(c||l)&&(o.margin=this.em(c)+" 0 "+this.em(l)),h+p||f){o.position="relative";var d=this.html("mjx-rbox",{style:{left:this.em(h+p),top:this.em(-f)}});h+p&&this.childNodes[0].getBBox().pwidth&&(this.adaptor.setAttribute(d,"width","full"),this.adaptor.setStyle(d,"left",this.em(h))),i.push(d)}n=this.adaptor.append(n,this.html("mjx-block",{style:o},i));try{for(var m=b(this.childNodes),y=m.next();!y.done;y=m.next())y.value.toCHTML(i[0]||n)}catch(t){e={error:t}}finally{try{y&&!y.done&&(r=m.return)&&r.call(m)}finally{if(e)throw e.error}}},u.kind=c.MmlMpadded.prototype.kind,u.styles={"mjx-mpadded":{display:"inline-block"},"mjx-rbox":{display:"inline-block",position:"relative"}},u);function u(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmpadded=l},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),l=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},m=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0 mjx-dstrike":{display:"inline-block",left:0,top:0,position:"absolute","border-top":b.SOLID,"transform-origin":"top left"},"mjx-menclose > mjx-ustrike":{display:"inline-block",left:0,bottom:0,position:"absolute","border-top":b.SOLID,"transform-origin":"bottom left"},"mjx-menclose > mjx-hstrike":{"border-top":b.SOLID,position:"absolute",left:0,right:0,bottom:"50%",transform:"translateY("+c.em(b.THICKNESS/2)+")"},"mjx-menclose > mjx-vstrike":{"border-left":b.SOLID,position:"absolute",top:0,bottom:0,right:"50%",transform:"translateX("+c.em(b.THICKNESS/2)+")"},"mjx-menclose > mjx-rbox":{position:"absolute",top:0,bottom:0,right:0,left:0,border:b.SOLID,"border-radius":c.em(b.THICKNESS+b.PADDING)},"mjx-menclose > mjx-cbox":{position:"absolute",top:0,bottom:0,right:0,left:0,border:b.SOLID,"border-radius":"50%"},"mjx-menclose > mjx-arrow":{position:"absolute",left:0,bottom:"50%",height:0,width:0},"mjx-menclose > mjx-arrow > *":{display:"block",position:"absolute","transform-origin":"bottom","border-left":c.em(b.THICKNESS*b.ARROWX)+" solid","border-right":0,"box-sizing":"border-box"},"mjx-menclose > mjx-arrow > mjx-aline":{left:0,top:c.em(-b.THICKNESS/2),right:c.em(b.THICKNESS*(b.ARROWX-1)),height:0,"border-top":c.em(b.THICKNESS)+" solid","border-left":0},"mjx-menclose > mjx-arrow[double] > mjx-aline":{left:c.em(b.THICKNESS*(b.ARROWX-1)),height:0},"mjx-menclose > mjx-arrow > mjx-rthead":{transform:"skewX("+u+"rad)",right:0,bottom:"-1px","border-bottom":"1px solid transparent","border-top":c.em(b.THICKNESS*b.ARROWY)+" solid transparent"},"mjx-menclose > mjx-arrow > mjx-rbhead":{transform:"skewX(-"+u+"rad)","transform-origin":"top",right:0,top:"-1px","border-top":"1px solid transparent","border-bottom":c.em(b.THICKNESS*b.ARROWY)+" solid transparent"},"mjx-menclose > mjx-arrow > mjx-lthead":{transform:"skewX(-"+u+"rad)",left:0,bottom:"-1px","border-left":0,"border-right":c.em(b.THICKNESS*b.ARROWX)+" solid","border-bottom":"1px solid transparent","border-top":c.em(b.THICKNESS*b.ARROWY)+" solid transparent"},"mjx-menclose > mjx-arrow > mjx-lbhead":{transform:"skewX("+u+"rad)","transform-origin":"top",left:0,top:"-1px","border-left":0,"border-right":c.em(b.THICKNESS*b.ARROWX)+" solid","border-top":"1px solid transparent","border-bottom":c.em(b.THICKNESS*b.ARROWY)+" solid transparent"},"mjx-menclose > dbox":{position:"absolute",top:0,bottom:0,left:c.em(-1.5*b.PADDING),width:c.em(3*b.PADDING),border:c.em(b.THICKNESS)+" solid","border-radius":"50%","clip-path":"inset(0 0 0 "+c.em(1.5*b.PADDING)+")","box-sizing":"border-box"}},f.notations=new Map([b.Border("top"),b.Border("right"),b.Border("bottom"),b.Border("left"),b.Border2("actuarial","top","right"),b.Border2("madruwb","bottom","right"),b.DiagonalStrike("up",1),b.DiagonalStrike("down",-1),["horizontalstrike",{renderer:b.RenderElement("hstrike","Y"),bbox:function(t){return[0,t.padding,0,t.padding]}}],["verticalstrike",{renderer:b.RenderElement("vstrike","X"),bbox:function(t){return[t.padding,0,t.padding,0]}}],["box",{renderer:function(t,e){t.adaptor.setStyle(e,"border",t.em(t.thickness)+" solid")},bbox:b.fullBBox,border:b.fullBorder,remove:"left right top bottom"}],["roundedbox",{renderer:b.RenderElement("rbox"),bbox:b.fullBBox}],["circle",{renderer:b.RenderElement("cbox"),bbox:b.fullBBox}],["phasorangle",{renderer:function(t,e){var r=t.getBBox(),n=(r.w,r.h),i=r.d,o=m(t.getArgMod(1.75*t.padding,n+i),2),a=o[0],s=o[1],c=t.thickness*Math.sin(a)*.9;t.adaptor.setStyle(e,"border-bottom",t.em(t.thickness)+" solid");var l=t.adjustBorder(t.html("mjx-ustrike",{style:{width:t.em(s),transform:"translateX("+t.em(c)+") rotate("+t.fixed(-a)+"rad)"}}));t.adaptor.append(t.chtml,l)},bbox:function(t){var e=t.padding/2,r=t.thickness;return[2*e,e,e+r,3*e+r]},border:function(t){return[0,0,t.thickness,0]},remove:"bottom"}],b.Arrow("up"),b.Arrow("down"),b.Arrow("left"),b.Arrow("right"),b.Arrow("updown"),b.Arrow("leftright"),b.DiagonalArrow("updiagonal"),b.DiagonalArrow("northeast"),b.DiagonalArrow("southeast"),b.DiagonalArrow("northwest"),b.DiagonalArrow("southwest"),b.DiagonalArrow("northeastsouthwest"),b.DiagonalArrow("northwestsoutheast"),["longdiv",{renderer:function(t,e){var r=t.adaptor;r.setStyle(e,"border-top",t.em(t.thickness)+" solid");var n=r.append(t.chtml,t.html("dbox")),i=t.thickness,o=t.padding;i!==b.THICKNESS&&r.setStyle(n,"border-width",t.em(i)),o!==b.PADDING&&(r.setStyle(n,"left",t.em(-1.5*o)),r.setStyle(n,"width",t.em(3*o)),r.setStyle(n,"clip-path","inset(0 0 0 "+t.em(1.5*o)+")"))},bbox:function(t){var e=t.padding,r=t.thickness;return[e+r,e,e,2*e+r/2]}}],["radical",{renderer:function(e,t){e.msqrt.toCHTML(t);var r=e.sqrtTRBL();e.adaptor.setStyle(e.msqrt.chtml,"margin",r.map(function(t){return e.em(-t)}).join(" "))},init:function(t){t.msqrt=t.createMsqrt(t.childNodes[0])},bbox:function(t){return t.sqrtTRBL()},renderChild:!0}]]),f);function f(){return null!==l&&l.apply(this,arguments)||this}e.CHTMLmenclose=h},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),f=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var c=r(36),l=r(10);e.CommonMencloseMixin=function(t){return i(e,n=t),e.prototype.getParameters=function(){var t=this.node.attributes,e=t.get("data-padding");void 0!==e&&(this.padding=this.length2em(e,c.PADDING));var r=t.get("data-thickness");void 0!==r&&(this.thickness=this.length2em(r,c.THICKNESS));var n=t.get("data-arrowhead");if(void 0!==n){var i=f(l.split(n),3),o=i[0],a=i[1],s=i[2];this.arrowhead={x:o?parseFloat(o):c.ARROWX,y:a?parseFloat(a):c.ARROWY,dx:s?parseFloat(s):c.ARROWDX}}},e.prototype.getNotations=function(){var e,t,r=this.constructor.notations;try{for(var n=h(l.split(this.node.attributes.get("notation"))),i=n.next();!i.done;i=n.next()){var o=i.value,a=r.get(o);a&&(this.notations[o]=a).renderChild&&(this.renderChild=a.renderer)}}catch(t){e={error:t}}finally{try{i&&!i.done&&(t=n.return)&&t.call(n)}finally{if(e)throw e.error}}},e.prototype.removeRedundantNotations=function(){var e,t,r,n;try{for(var i=h(Object.keys(this.notations)),o=i.next();!o.done;o=i.next()){var a=o.value;if(this.notations[a]){var s=this.notations[a].remove||"";try{for(var c=(r=void 0,h(s.split(/ /))),l=c.next();!l.done;l=c.next()){var u=l.value;delete this.notations[u]}}catch(t){r={error:t}}finally{try{l&&!l.done&&(n=c.return)&&n.call(c)}finally{if(r)throw r.error}}}}}catch(t){e={error:t}}finally{try{o&&!o.done&&(t=i.return)&&t.call(i)}finally{if(e)throw e.error}}},e.prototype.initializeNotations=function(){var e,t;try{for(var r=h(Object.keys(this.notations)),n=r.next();!n.done;n=r.next()){var i=n.value,o=this.notations[i].init;o&&o(this)}}catch(t){e={error:t}}finally{try{n&&!n.done&&(t=r.return)&&t.call(r)}finally{if(e)throw e.error}}},e.prototype.computeBBox=function(t,e){void 0===e&&(e=!1);var r=f(this.getBBoxExtenders(),4),n=r[0],i=r[1],o=r[2],a=r[3],s=this.childNodes[0].getBBox();t.combine(s,a,0),t.h+=n,t.d+=o,t.w+=i,this.setChildPWidths(e)},e.prototype.getBBoxExtenders=function(){var e,t,r=[0,0,0,0];try{for(var n=h(Object.keys(this.notations)),i=n.next();!i.done;i=n.next()){var o=i.value;this.maximizeEntries(r,this.notations[o].bbox(this))}}catch(t){e={error:t}}finally{try{i&&!i.done&&(t=n.return)&&t.call(n)}finally{if(e)throw e.error}}return r},e.prototype.getPadding=function(){var e,t,r=[0,0,0,0],n=[0,0,0,0];try{for(var i=h(Object.keys(this.notations)),o=i.next();!o.done;o=i.next()){var a=o.value;this.maximizeEntries(r,this.notations[a].bbox(this));var s=this.notations[a].border;s&&this.maximizeEntries(n,s(this))}}catch(t){e={error:t}}finally{try{o&&!o.done&&(t=i.return)&&t.call(i)}finally{if(e)throw e.error}}return[0,1,2,3].map(function(t){return r[t]-n[t]})},e.prototype.maximizeEntries=function(t,e){for(var r=0;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(2),s=r(73),c=r(73),u=r(52),h=(o=s.CommonMrowMixin(a.CHTMLWrapper),i(f,o),f.prototype.toCHTML=function(t){var e,r,n=this.node.isInferred?this.chtml=t:this.standardCHTMLnode(t),i=!1;try{for(var o=l(this.childNodes),a=o.next();!a.done;a=o.next()){var s=a.value;s.toCHTML(n),s.bbox.w<0&&(i=!0)}}catch(t){e={error:t}}finally{try{a&&!a.done&&(r=o.return)&&r.call(o)}finally{if(e)throw e.error}}if(i){var c=this.getBBox().w;c&&(this.adaptor.setStyle(n,"width",this.em(Math.max(0,c))),c<0&&this.adaptor.setStyle(n,"marginRight",this.em(c)))}},f.kind=u.MmlMrow.prototype.kind,f);function f(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmrow=h;var p,d=(p=c.CommonInferredMrowMixin(h),i(m,p),m.kind=u.MmlInferredMrow.prototype.kind,m);function m(){return null!==p&&p.apply(this,arguments)||this}e.CHTMLinferredMrow=d},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(2),s=r(151),c=r(57),l=(o=s.CommonMfencedMixin(a.CHTMLWrapper),i(u,o),u.prototype.toCHTML=function(t){var e=this.standardCHTMLnode(t);this.mrow.toCHTML(e)},u.kind=c.MmlMfenced.prototype.kind,u);function u(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmfenced=l},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0}),e.CommonMfencedMixin=function(t){return i(e,n=t),e.prototype.createMrow=function(){var t=this.node.factory.create("inferredMrow");t.inheritAttributesFrom(this.node),this.mrow=this.wrap(t),this.mrow.parent=this},e.prototype.addMrowChildren=function(){var e,t,r=this.node,n=this.mrow;this.addMo(r.open),this.childNodes.length&&n.childNodes.push(this.childNodes[0]);var i=0;try{for(var o=c(this.childNodes.slice(1)),a=o.next();!a.done;a=o.next()){var s=a.value;this.addMo(r.separators[i++]),n.childNodes.push(s)}}catch(t){e={error:t}}finally{try{a&&!a.done&&(t=o.return)&&t.call(o)}finally{if(e)throw e.error}}this.addMo(r.close),n.stretchChildren()},e.prototype.addMo=function(t){if(t){var e=this.wrap(t);this.mrow.childNodes.push(e),e.parent=this.mrow}},e.prototype.computeBBox=function(t,e){void 0===e&&(e=!1),t.updateFrom(this.mrow.getBBox()),this.setChildPWidths(e)},e;function e(){for(var t=[],e=0;e *":{"font-size":"2000%"},"mjx-dbox":{display:"block","font-size":"5%"},"mjx-num":{display:"block","text-align":"center"},"mjx-den":{display:"block","text-align":"center"},"mjx-mfrac[bevelled] > mjx-num":{display:"inline-block"},"mjx-mfrac[bevelled] > mjx-den":{display:"inline-block"},'mjx-den[align="right"], mjx-num[align="right"]':{"text-align":"right"},'mjx-den[align="left"], mjx-num[align="left"]':{"text-align":"left"},"mjx-nstrut":{display:"inline-block",height:".054em",width:0,"vertical-align":"-.054em"},'mjx-nstrut[type="d"]':{height:".217em","vertical-align":"-.217em"},"mjx-dstrut":{display:"inline-block",height:".505em",width:0},'mjx-dstrut[type="d"]':{height:".726em"},"mjx-line":{display:"block","box-sizing":"border-box","min-height":"1px",height:".06em","border-top":".06em solid",margin:".06em -.1em",overflow:"hidden"},'mjx-line[type="d"]':{margin:".18em -.1em"}},u);function u(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmfrac=l},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),l=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0this.surdH?(t.h+t.d-(this.surdH-e))/2:e+r/4]},e.prototype.getRootDimens=function(t){return[0,0,0,0]},e;function e(){for(var t=[],e=0;e=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var a,s=r(2),c=r(158),l=(a=c.CommonScriptbaseMixin(s.CHTMLWrapper),i(h,a),h.prototype.toCHTML=function(t){this.chtml=this.standardCHTMLnode(t);var e=o(this.getOffset(this.baseChild.getBBox(),this.script.getBBox()),2),r=e[0],n=e[1],i={"vertical-align":this.em(n)};r&&(i["margin-left"]=this.em(r)),this.baseChild.toCHTML(this.chtml),this.script.toCHTML(this.adaptor.append(this.chtml,this.html("mjx-script",{style:i})))},h.prototype.setDeltaW=function(t,e){for(var r=0;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var a=1.5;e.CommonScriptbaseMixin=function(t){var e,i;return o(r,i=t),Object.defineProperty(r.prototype,"baseChild",{get:function(){return this.childNodes[this.node.base]},enumerable:!0,configurable:!0}),Object.defineProperty(r.prototype,"script",{get:function(){return this.childNodes[1]},enumerable:!0,configurable:!0}),r.prototype.computeBBox=function(t,e){void 0===e&&(e=!1);var r=this.baseChild.getBBox(),n=this.script.getBBox(),i=s(this.getOffset(r,n),2),o=i[0],a=i[1];t.append(r),t.combine(n,t.w+o,a),t.w+=this.font.params.scriptspace,t.clean(),this.setChildPWidths(e)},r.prototype.coreIC=function(){var t=this.baseCore.getBBox();return t.ic?1.2*t.ic+.05:0},r.prototype.isCharBase=function(){var t=this.baseChild;return(t.node.isKind("mstyle")||t.node.isKind("mrow"))&&1===t.childNodes.length&&(t=t.childNodes[0]),(t.node.isKind("mo")||t.node.isKind("mi")||t.node.isKind("mn"))&&1===t.bbox.rscale&&1===t.getText().length&&!t.node.attributes.get("largeop")},r.prototype.getOffset=function(t,e){return[0,0]},r.prototype.getV=function(t,e){var r=this.font.params,n=this.length2em(this.node.attributes.get("subscriptshift"),r.sub1);return Math.max(this.isCharBase()?0:t.d+r.sub_drop*e.rscale,n,e.h*e.rscale-.8*r.x_height)},r.prototype.getU=function(t,e){var r=this.font.params,n=this.node.attributes.getList("displaystyle","texprimestyle","superscriptshift"),i=n.displaystyle?r.sup1:n.texprimestyle?r.sup3:r.sup2,o=this.length2em(n.superscriptshift,i);return Math.max(this.isCharBase()?0:t.h-r.sup_drop*e.rscale,o,e.d*e.rscale+.25*r.x_height)},r.prototype.hasMovableLimits=function(){return!this.node.attributes.get("displaystyle")&&(this.node.getProperty("movablelimits")||this.node.attributes.get("movablelimits")||this.baseChild.coreMO().node.attributes.get("movablelimits"))},r.prototype.getOverKU=function(t,e){var r=this.node.attributes.get("accent"),n=this.font.params,i=e.d*e.rscale,o=(r?n.rule_thickness:Math.max(n.big_op_spacing1,n.big_op_spacing3-Math.max(0,i)))-(this.baseChild.node.isKind("munderover")?.1:0);return[o,t.h*t.rscale+o+i]},r.prototype.getUnderKV=function(t,e){var r=this.node.attributes.get("accentunder"),n=this.font.params,i=e.h*e.rscale,o=(r?n.rule_thickness:Math.max(n.big_op_spacing2,n.big_op_spacing4-i))-(this.baseChild.node.isKind("munderover")?.1:0);return[o,-(t.d*t.rscale+o+i)]},r.prototype.getDeltaW=function(t,e){var r,n,i,o;void 0===e&&(e=[0,0,0]);var a=this.node.attributes.get("align"),s=t.map(function(t){return t.w*t.rscale}),c=Math.max.apply(Math,y(s)),l=[],u=0;try{for(var h=x(s.keys()),f=h.next();!f.done;f=h.next())l[m=f.value]=("center"===a?(c-s[m])/2:"right"===a?c-s[m]:0)+e[m],l[m] mjx-row":{"text-align":"left"},"mjx-under":{"padding-bottom":".1em"}},f);function f(){return null!==c&&c.apply(this,arguments)||this}e.CHTMLmunder=h;var d,m=(d=s.CommonMoverMixin(o.CHTMLmsup),i(y,d),y.prototype.toCHTML=function(t){if(this.hasMovableLimits())return d.prototype.toCHTML.call(this,t),void this.adaptor.setAttribute(this.chtml,"limits","false");this.chtml=this.standardCHTMLnode(t);var e=this.adaptor.append(this.chtml,this.html("mjx-over")),r=this.adaptor.append(this.chtml,this.html("mjx-base"));this.script.toCHTML(e),this.baseChild.toCHTML(r);var n=this.script.getBBox(),i=this.baseChild.getBBox(),o=p(this.getOverKU(i,n),2),a=o[0],s=(o[1],this.getDelta());this.adaptor.setStyle(e,"paddingBottom",this.em(a)),this.setDeltaW([r,e],this.getDeltaW([i,n],[0,s])),this.adjustOverDepth(e,n)},y.kind=u.MmlMover.prototype.kind,y.useIC=!0,y.styles={'mjx-mover:not([limits="false"])':{"padding-top":".1em"},'mjx-mover:not([limits="false"]) > *':{display:"block","text-align":"left"}},y);function y(){return null!==d&&d.apply(this,arguments)||this}e.CHTMLmover=m;var v,b=(v=l.CommonMunderoverMixin(o.CHTMLmsubsup),i(g,v),g.prototype.toCHTML=function(t){if(this.hasMovableLimits())return v.prototype.toCHTML.call(this,t),void this.adaptor.setAttribute(this.chtml,"limits","false");this.chtml=this.standardCHTMLnode(t);var e=this.adaptor.append(this.chtml,this.html("mjx-over")),r=this.adaptor.append(this.adaptor.append(this.chtml,this.html("mjx-box")),this.html("mjx-munder")),n=this.adaptor.append(this.adaptor.append(r,this.html("mjx-row")),this.html("mjx-base")),i=this.adaptor.append(this.adaptor.append(r,this.html("mjx-row")),this.html("mjx-under"));this.overChild.toCHTML(e),this.baseChild.toCHTML(n),this.underChild.toCHTML(i);var o=this.overChild.getBBox(),a=this.baseChild.getBBox(),s=this.underChild.getBBox(),c=p(this.getOverKU(a,o),2),l=c[0],u=(c[1],p(this.getUnderKV(a,s),2)),h=u[0],f=(u[1],this.getDelta());this.adaptor.setStyle(e,"paddingBottom",this.em(l)),this.adaptor.setStyle(i,"paddingTop",this.em(h)),this.setDeltaW([n,i,e],this.getDeltaW([a,s,o],[0,-f,f])),this.adjustOverDepth(e,o),this.adjustUnderDepth(i,s)},g.kind=u.MmlMunderover.prototype.kind,g.useIC=!0,g.styles={'mjx-munderover:not([limits="false"])':{"padding-top":".1em"},'mjx-munderover:not([limits="false"]) > *':{display:"block"}},g);function g(){return null!==v&&v.apply(this,arguments)||this}e.CHTMLmunderover=b},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),c=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0 mjx-row > mjx-cell":{"text-align":"right"}},h);function h(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmmultiscripts=u},function(t,s,e){"use strict";var n,r=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),d=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(s,"__esModule",{value:!0});var i=e(16);s.NextScript={base:"subList",subList:"supList",supList:"subList",psubList:"psupList",psupList:"psubList"},s.ScriptNames=["sup","sup","psup","psub"],s.CommonMmultiscriptsMixin=function(t){return r(e,c=t),e.prototype.combinePrePost=function(t,e){var r=new i.BBox(t);return r.combine(e,0,0),r},e.prototype.computeBBox=function(t,e){void 0===e&&(e=!1);var r=this.font.params.scriptspace,n=this.getScriptData(),i=this.combinePrePost(n.sub,n.psub),o=this.combinePrePost(n.sup,n.psup),a=d(this.getUVQ(n.base,i,o),2),s=a[0],c=a[1];if(t.empty(),n.numPrescripts&&(t.combine(n.psup,r,s),t.combine(n.psub,r,c)),t.append(n.base),n.numScripts){var l=t.w;t.combine(n.sup,l,s),t.combine(n.sub,l,c),t.w+=r}t.clean(),this.setChildPWidths(e)},e.prototype.getScriptData=function(){if(this.scriptData)return this.scriptData;var t=this.scriptData={base:null,sub:i.BBox.empty(),sup:i.BBox.empty(),psub:i.BBox.empty(),psup:i.BBox.empty(),numPrescripts:0,numScripts:0},e=this.getScriptBBoxLists();return this.combineBBoxLists(t.sub,t.sup,e.subList,e.supList),this.combineBBoxLists(t.psub,t.psup,e.psubList,e.psupList),this.scriptData.base=e.base[0],this.scriptData.numPrescripts=e.psubList.length,this.scriptData.numScripts=e.subList.length,this.scriptData},e.prototype.getScriptBBoxLists=function(){var e,t,r={base:[],subList:[],supList:[],psubList:[],psupList:[]},n="base";try{for(var i=l(this.childNodes),o=i.next();!o.done;o=i.next()){var a=o.value;n=a.node.isKind("mprescripts")?"psubList":(r[n].push(a.getBBox()),s.NextScript[n])}}catch(t){e={error:t}}finally{try{o&&!o.done&&(t=i.return)&&t.call(i)}finally{if(e)throw e.error}}return this.firstPrescript=r.subList.length+r.supList.length+2,this.padLists(r.subList,r.supList),this.padLists(r.psubList,r.psupList),r},e.prototype.padLists=function(t,e){t.length>e.length&&e.push(i.BBox.empty())},e.prototype.combineBBoxLists=function(t,e,r,n){for(var i=0;it.h&&(t.h=s),c>t.d&&(t.d=c),h>e.h&&(e.h=h),f>e.d&&(e.d=f)}},e.prototype.getScaledWHD=function(t){var e=t.w,r=t.h,n=t.d,i=t.rscale;return[e*i,r*i,n*i]},e.prototype.getUVQ=function(t,e,r){var n;if(!this.UVQ){var i=d([0,0,0],3),o=i[0],a=i[1],s=i[2];0===e.h&&0===e.d?o=this.getU(t,r):0===r.h&&0===r.d?o=-this.getV(t,e):(o=(n=d(c.prototype.getUVQ.call(this,t,e,r),3))[0],a=n[1],s=n[2]),this.UVQ=[o,a,s]}return this.UVQ},e;function e(){var t=null!==c&&c.apply(this,arguments)||this;return t.scriptData=null,t.firstPrescript=0,t}var c}},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),y=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},u=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0 mjx-itable":{"vertical-align":"middle","text-align":"left","box-sizing":"border-box"},"mjx-labels > mjx-itable":{position:"absolute",top:0},'mjx-mtable[justify="left"]':{"text-align":"left"},'mjx-mtable[justify="right"]':{"text-align":"right"},'mjx-mtable[justify="left"][side="left"]':{"padding-right":"0 ! important"},'mjx-mtable[justify="left"][side="right"]':{"padding-left":"0 ! important"},'mjx-mtable[justify="right"][side="left"]':{"padding-right":"0 ! important"},'mjx-mtable[justify="right"][side="right"]':{"padding-left":"0 ! important"},"mjx-mtable[align]":{"vertical-align":"baseline"},'mjx-mtable[align="top"] > mjx-table':{"vertical-align":"top"},'mjx-mtable[align="bottom"] > mjx-table':{"vertical-align":"bottom"},'mjx-mtable[align="center"] > mjx-table':{"vertical-align":"middle"},'mjx-mtable[align="baseline"] > mjx-table':{"vertical-align":"middle"}},f);function f(t,e,r){void 0===r&&(r=null);var n=o.call(this,t,e,r)||this;return n.itable=n.html("mjx-itable"),n.labels=n.html("mjx-itable"),n}e.CHTMLmtable=l},function(t,e,r){"use strict";var n,o=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),y=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var s=r(16),v=r(10),b=r(104);e.CommonMtableMixin=function(t){return o(e,i=t),Object.defineProperty(e.prototype,"tableRows",{get:function(){return this.childNodes},enumerable:!0,configurable:!0}),e.prototype.findContainer=function(){for(var t=this,e=t.parent;e&&(e.node.notParent||e.node.isKind("mrow"));)e=(t=e).parent;this.container=e,this.containerI=t.node.childPosition()},e.prototype.getPercentageWidth=function(){if(this.hasLabels)this.bbox.pwidth=s.BBox.fullWidth;else{var t=this.node.attributes.get("width");v.isPercent(t)&&(this.bbox.pwidth=t)}},e.prototype.stretchRows=function(){for(var t=this.node.attributes.get("equalrows"),e=t?this.getEqualRowHeight():0,r=t?this.getTableData():{H:[0],D:[0]},n=r.H,i=r.D,o=this.tableRows,a=0;an[r]&&(n[r]=s),c>i[r]&&(i[r]=c),o&&l>o[e]&&(o[e]=l)},e.prototype.recordPWidthCell=function(t,e){t.childNodes[0]&&t.childNodes[0].getBBox().pwidth&&this.pwidthCells.push([t,e])},e.prototype.computeBBox=function(t,e){void 0===e&&(e=!1);var r,n,i=this.getTableData(),o=i.H,a=i.D;if(this.node.attributes.get("equalrows")){var s=this.getEqualRowHeight();r=b.sum([].concat(this.rLines,this.rSpace))+s*this.numRows}else r=b.sum(o.concat(a,this.rLines,this.rSpace));r+=2*(this.fLine+this.fSpace[1]);var c=this.getComputedWidths();n=b.sum(c.concat(this.cLines,this.cSpace))+2*(this.fLine+this.fSpace[0]);var l=this.node.attributes.get("width");"auto"!==l&&(n=Math.max(this.length2em(l,0)+2*this.fLine,n));var u=y(this.getBBoxHD(r),2),h=u[0],f=u[1];t.h=h,t.d=f,t.w=n;var p=y(this.getBBoxLR(),2),d=p[0],m=p[1];t.L=d,t.R=m,v.isPercent(l)||this.setColumnPWidths()},e.prototype.setChildPWidths=function(t,e,r){var n=this.node.attributes.get("width");if(v.isPercent(n)){this.hasLabels||(this.bbox.pwidth="",this.container.bbox.pwidth="");var i=this.bbox,o=i.w,a=i.L,s=i.R,c=Math.max(o,this.length2em(n,Math.max(e,a+o+s))),l=this.node.attributes.get("equalcolumns")?Array(this.numCols).fill(this.percent(1/Math.max(1,this.numCols))):this.getColumnAttributes("columnwidth",0);this.cWidths=this.getColumnWidthsFixed(l,c);var u=this.getComputedWidths();return this.pWidth=b.sum(u.concat(this.cLines,this.cSpace))+2*(this.fLine+this.fSpace[0]),this.isTop&&(this.bbox.w=this.pWidth),this.setColumnPWidths(),this.pWidth!==o&&this.parent.invalidateBBox(),this.pWidth!==o}},e.prototype.setColumnPWidths=function(){var e,t,r=this.cWidths;try{for(var n=x(this.pwidthCells),i=n.next();!i.done;i=n.next()){var o=y(i.value,2),a=o[0],s=o[1];a.setChildPWidths(!1,r[s])&&(a.invalidateBBox(),a.getBBox())}}catch(t){e={error:t}}finally{try{i&&!i.done&&(t=n.return)&&t.call(n)}finally{if(e)throw e.error}}},e.prototype.getBBoxHD=function(t){var e=y(this.getAlignmentRow(),2),r=e[0],n=e[1];if(null===n){var i=this.font.params.axis_height,o=t/2;return{top:[0,t],center:[o,o],bottom:[t,0],baseline:[o,o],axis:[o+i,o-i]}[r]||[o,o]}var a=this.getVerticalPosition(n,r);return[a,t-a]},e.prototype.getBBoxLR=function(){if(this.hasLabels){var t=this.node.attributes.get("side"),e=y(this.getPadAlignShift(t),3),r=e[0],n=e[1];return e[2],"center"===n?[r,r]:"left"===t?[r,0]:[0,r]}return[0,0]},e.prototype.getPadAlignShift=function(t){var e=this.getTableData().L+this.length2em(this.node.attributes.get("minlabelspacing")),r=y(null==this.styles?["",""]:[this.styles.get("padding-left"),this.styles.get("padding-right")],2),n=r[0],i=r[1];(n||i)&&(e=Math.max(e,this.length2em(n||"0"),this.length2em(i||"0")));var o=y(this.getAlignShift(),2),a=o[0],s=o[1];return a===t&&(s="left"===t?Math.max(e,s)-e:Math.min(-e,s)+e),[e,a,s]},e.prototype.getAlignShift=function(){return this.isTop?i.prototype.getAlignShift.call(this):[this.container.getChildAlign(this.containerI),0]},e.prototype.getWidth=function(){return this.pWidth||this.getBBox().w},e.prototype.getEqualRowHeight=function(){var t=this.getTableData(),e=t.H,r=t.D,n=Array.from(e.keys()).map(function(t){return e[t]+r[t]});return Math.max.apply(Math,n)},e.prototype.getComputedWidths=function(){var e=this,r=this.getTableData().W,t=Array.from(r.keys()).map(function(t){return"number"==typeof e.cWidths[t]?e.cWidths[t]:r[t]});return this.node.attributes.get("equalcolumns")&&(t=Array(t.length).fill(b.max(t))),t},e.prototype.getColumnWidths=function(){var t=this.node.attributes.get("width");if(this.node.attributes.get("equalcolumns"))return this.getEqualColumns(t);var e=this.getColumnAttributes("columnwidth",0);return"auto"===t?this.getColumnWidthsAuto(e):v.isPercent(t)?this.getColumnWidthsPercent(e,t):this.getColumnWidthsFixed(e,this.length2em(t))},e.prototype.getEqualColumns=function(t){var e,r=Math.max(1,this.numCols);if("auto"===t){var n=this.getTableData().W;e=b.max(n)}else if(v.isPercent(t))e=this.percent(1/r);else{var i=b.sum([].concat(this.cLines,this.cSpace))+2*this.fSpace[0];e=Math.max(0,this.length2em(t)-i)/r}return Array(this.numCols).fill(e)},e.prototype.getColumnWidthsAuto=function(t){var e=this;return t.map(function(t){return"auto"===t||"fit"===t?null:v.isPercent(t)?t:e.length2em(t)})},e.prototype.getColumnWidthsPercent=function(r,t){var n=this,i=0<=r.indexOf("fit"),o=(i?this.getTableData():{W:null}).W;return Array.from(r.keys()).map(function(t){var e=r[t];return"fit"===e?null:"auto"===e?i?o[t]:null:v.isPercent(e)?e:n.length2em(e)})},e.prototype.getColumnWidthsFixed=function(r,n){var i=this,t=Array.from(r.keys()),o=t.filter(function(t){return"fit"===r[t]}),e=t.filter(function(t){return"auto"===r[t]}),a=o.length||e.length,s=(a?this.getTableData():{W:null}).W,c=n-b.sum([].concat(this.cLines,this.cSpace))-2*this.fSpace[0],l=c;t.forEach(function(t){var e=r[t];l-="fit"===e||"auto"===e?s[t]:i.length2em(e,n)});var u=a&&0this.numRows?null:n-1]},e.prototype.getColumnAttributes=function(t,e){void 0===e&&(e=1);var r=this.numCols-e,n=this.getAttributeArray(t);if(0!==n.length){for(;n.lengthr&&n.splice(r),n}},e.prototype.getRowAttributes=function(t,e){void 0===e&&(e=1);var r=this.numRows-e,n=this.getAttributeArray(t);if(0!==n.length){for(;n.lengthr&&n.splice(r),n}},e.prototype.getAttributeArray=function(t){var e=this.node.attributes.get(t);return e?v.split(e):[this.node.attributes.getDefault(t)]},e.prototype.addEm=function(t,e){var r=this;if(void 0===e&&(e=1),t)return t.map(function(t){return r.em(t/e)})},e.prototype.convertLengths=function(t){var e=this;if(t)return t.map(function(t){return e.length2em(t)})},e;function e(){for(var t=[],e=0;e mjx-mtd':{"vertical-align":"top"},'mjx-mtr[rowalign="center"] > mjx-mtd':{"vertical-align":"middle"},'mjx-mtr[rowalign="bottom"] > mjx-mtd':{"vertical-align":"bottom"},'mjx-mtr[rowalign="baseline"] > mjx-mtd':{"vertical-align":"baseline"},'mjx-mtr[rowalign="axis"] > mjx-mtd':{"vertical-align":".25em"}},h);function h(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmtr=u;var f,p=(f=c.CommonMlabeledtrMixin(u),i(d,f),d.prototype.toCHTML=function(t){f.prototype.toCHTML.call(this,t);var e=this.adaptor.firstChild(this.chtml);if(e){this.adaptor.remove(e);var r=this.node.attributes.get("rowalign"),n="baseline"!==r&&"axis"!==r?{rowalign:r}:{},i=this.html("mjx-mtr",n,[e]);this.adaptor.append(this.parent.labels,i)}},d.kind=l.MmlMlabeledtr.prototype.kind,d.styles={"mjx-mlabeledtr":{display:"table-row"},'mjx-mlabeledtr[rowalign="top"] > mjx-mtd':{"vertical-align":"top"},'mjx-mlabeledtr[rowalign="center"] > mjx-mtd':{"vertical-align":"middle"},'mjx-mlabeledtr[rowalign="bottom"] > mjx-mtd':{"vertical-align":"bottom"},'mjx-mlabeledtr[rowalign="baseline"] > mjx-mtd':{"vertical-align":"baseline"},'mjx-mlabeledtr[rowalign="axis"] > mjx-mtd':{"vertical-align":".25em"}},d);function d(){return null!==f&&f.apply(this,arguments)||this}e.CHTMLmlabeledtr=p},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(2),s=r(166),c=r(64),l=(o=s.CommonMtdMixin(a.CHTMLWrapper),i(u,o),u.prototype.toCHTML=function(t){o.prototype.toCHTML.call(this,t);var e=this.node.attributes.get("rowalign"),r=this.node.attributes.get("columnalign");e!==this.parent.node.attributes.get("rowalign")&&this.adaptor.setAttribute(this.chtml,"rowalign",e),"center"===r||"mlabeledtr"===this.parent.kind&&this===this.parent.childNodes[0]&&r===this.parent.parent.node.attributes.get("side")||this.adaptor.setStyle(this.chtml,"textAlign",r),this.adaptor.append(this.chtml,this.html("mjx-tstrut"))},u.kind=c.MmlMtd.prototype.kind,u.styles={"mjx-mtd":{display:"table-cell","text-align":"center",padding:".215em .4em"},"mjx-mtd:first-child":{"padding-left":0},"mjx-mtd:last-child":{"padding-right":0},"mjx-mtable > * > mjx-itable > *:first-child > mjx-mtd":{"padding-top":0},"mjx-mtable > * > mjx-itable > *:last-child > mjx-mtd":{"padding-bottom":0},"mjx-tstrut":{display:"inline-block",height:"1em","vertical-align":"-.25em"},'mjx-labels[align="left"] > mjx-mtr > mjx-mtd':{"text-align":"left"},'mjx-labels[align="right"] > mjx-mtr > mjx-mtd':{"text-align":"right"},'mjx-mtr mjx-mtd[rowalign="top"], mjx-mlabeledtr mjx-mtd[rowalign="top"]':{"vertical-align":"top"},'mjx-mtr mjx-mtd[rowalign="center"], mjx-mlabeledtr mjx-mtd[rowalign="center"]':{"vertical-align":"middle"},'mjx-mtr mjx-mtd[rowalign="bottom"], mjx-mlabeledtr mjx-mtd[rowalign="bottom"]':{"vertical-align":"bottom"},'mjx-mtr mjx-mtd[rowalign="baseline"], mjx-mlabeledtr mjx-mtd[rowalign="baseline"]':{"vertical-align":"baseline"},'mjx-mtr mjx-mtd[rowalign="axis"], mjx-mlabeledtr mjx-mtd[rowalign="axis"]':{"vertical-align":".25em"}},u);function u(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmtd=l},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0}),e.CommonMtdMixin=function(t){return i(e,r=t),Object.defineProperty(e.prototype,"fixesPWidth",{get:function(){return!1},enumerable:!0,configurable:!0}),e.prototype.invalidateBBox=function(){this.bboxComputed=!1},e.prototype.getWrapWidth=function(t){var e=this.parent.parent,r=this.parent,n=this.node.childPosition()-(r.labeled?1:0);return"number"==typeof e.cWidths[n]?e.cWidths[n]:e.getTableData().W[n]},e.prototype.getChildAlign=function(t){return this.node.attributes.get("columnalign")},e;function e(){return null!==r&&r.apply(this,arguments)||this}var r}},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(2),s=r(76),c=r(76),l=r(59),u=(o=s.CommonMactionMixin(a.CHTMLWrapper),i(h,o),h.prototype.toCHTML=function(t){var e=this.standardCHTMLnode(t);this.selected.toCHTML(e),this.action(this,this.data)},h.prototype.setEventHandler=function(t,e){this.chtml.addEventListener(t,e)},h.kind=l.MmlMaction.prototype.kind,h.styles={"mjx-maction":{position:"relative"},"mjx-maction > mjx-tool":{display:"none",position:"absolute",bottom:0,right:0,width:0,height:0,"z-index":500},"mjx-tool > mjx-tip":{display:"inline-block",padding:".2em",border:"1px solid #888","font-size":"70%","background-color":"#F8F8F8",color:"black","box-shadow":"2px 2px 5px #AAAAAA"},"mjx-maction[toggle]":{cursor:"pointer"},"mjx-status":{display:"block",position:"fixed",left:"1em",bottom:"1em","min-width":"25%",padding:".2em .4em",border:"1px solid #888","font-size":"90%","background-color":"#F8F8F8",color:"black"}},h.actions=new Map([["toggle",[function(t,e){t.adaptor.setAttribute(t.chtml,"toggle",t.node.attributes.get("selection"));var r=t.factory.jax.math,n=t.factory.jax.document,i=t.node;t.setEventHandler("click",function(t){r.start.node||(r.start.node=r.end.node=r.typesetRoot,r.start.n=r.end.n=0),i.nextToggleSelection(),r.rerender(n),t.stopPropagation()})},{}]],["tooltip",[function(r,n){var t=r.childNodes[1];if(t)if(t.node.isKind("mtext")){var e=t.node.getText();r.adaptor.setAttribute(r.chtml,"title",e)}else{var i=r.adaptor,o=i.append(r.chtml,r.html("mjx-tool",{style:{bottom:r.em(-r.dy),right:r.em(-r.dx)}},[r.html("mjx-tip")]));t.toCHTML(i.firstChild(o)),r.setEventHandler("mouseover",function(t){n.stopTimers(r,n);var e=setTimeout(function(){return i.setStyle(o,"display","block")},n.postDelay);n.hoverTimer.set(r,e),t.stopPropagation()}),r.setEventHandler("mouseout",function(t){n.stopTimers(r,n);var e=setTimeout(function(){return i.setStyle(o,"display","")},n.clearDelay);n.clearTimer.set(r,e),t.stopPropagation()})}},c.TooltipData]],["statusline",[function(r,n){var t=r.childNodes[1];if(t&&t.node.isKind("mtext")){var i=r.adaptor,o=t.node.getText();i.setAttribute(r.chtml,"statusline",o),r.setEventHandler("mouseover",function(t){if(null===n.status){var e=i.body(i.document);n.status=i.append(e,r.html("mjx-status",{},[r.text(o)]))}t.stopPropagation()}),r.setEventHandler("mouseout",function(t){n.status&&(i.remove(n.status),n.status=null),t.stopPropagation()})}},{status:null}]]]),h);function h(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmaction=u},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(2),s=r(169),c=r(65),l=(o=s.CommonMglyphMixin(a.CHTMLWrapper),i(u,o),u.prototype.toCHTML=function(t){var e=this.standardCHTMLnode(t),r=this.node.attributes.getList("src","alt"),n=r.src,i=r.alt,o={width:this.em(this.width),height:this.em(this.height)};this.voffset&&(o.verticalAlign=this.em(-this.voffset));var a=this.html("img",{src:n,style:o,alt:i,title:i});this.adaptor.append(e,a)},u.kind=c.MmlMglyph.prototype.kind,u.styles={"mjx-mglyph > img":{display:"inline-block",border:0,padding:0}},u);function u(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLmglyph=l},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(0),s=r(2),c=r(175),l=(o=c.CommonTextNodeMixin(s.CHTMLWrapper),i(u,o),u.prototype.toCHTML=function(t){var e,r;this.markUsed();var n=this.adaptor,i=this.parent.variant,o=this.node.getText();if("-explicitFont"===i){var a=this.jax.getFontData(this.parent.styles);n.append(t,this.jax.unknownText(o,i,a))}else{var s=this.parent.stretch.c,c=this.parent.remapChars(s?[s]:this.unicodeChars(o));try{for(var l=d(c),u=l.next();!u.done;u=l.next()){var h=u.value,f=this.getVariantChar(i,h)[3],p=f.unknown?this.jax.unknownText(String.fromCharCode(h),i):this.html("mjx-c",{class:this.char(h)});n.append(t,p),f.used=!0}}catch(t){e={error:t}}finally{try{u&&!u.done&&(r=l.return)&&r.call(l)}finally{if(e)throw e.error}}}},u.kind=a.TextNode.prototype.kind,u.autoStyle=!1,u.styles={"mjx-c":{display:"inline-block"},"mjx-utext":{display:"inline-block",padding:".75em 0 .25em 0"},"mjx-measure-text":{position:"absolute","font-family":"MJXZERO","white-space":"nowrap",height:"1px",width:"1px",overflow:"hidden"}},u);function u(){return null!==o&&o.apply(this,arguments)||this}e.CHTMLTextNode=l},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),g=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},M=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0t.h&&(t.h=l),u>t.d&&(t.d=u),t.ic=v.ic||0,t.sk=v.sk||0}}catch(t){r={error:t}}finally{try{d&&!d.done&&(n=p.return)&&n.call(p)}finally{if(r)throw r.error}}1"},63:{c:"?"},64:{c:"@"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},95:{c:"_"},96:{c:"`"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},123:{c:"{"},124:{c:"|"},125:{c:"}"},126:{c:"~"},183:{c:"\\22C5"},697:{c:"\\2032"},913:{c:"A"},914:{c:"B"},917:{c:"E"},918:{c:"Z"},919:{c:"H"},921:{c:"I"},922:{c:"K"},924:{c:"M"},925:{c:"N"},927:{c:"O"},929:{c:"P"},930:{c:"\\398"},932:{c:"T"},935:{c:"X"},978:{c:"\\3A5"},988:{c:"F"},8194:{c:""},8195:{c:""},8196:{c:""},8197:{c:""},8198:{c:""},8201:{c:""},8202:{c:""},8213:{c:"\\2014"},8214:{c:"\\2225"},8215:{c:"_"},8226:{c:"\\2219"},8243:{c:"\\2032\\2032"},8244:{c:"\\2032\\2032\\2032"},8254:{c:"\\2C9"},8260:{c:"/"},8279:{c:"\\2032\\2032\\2032\\2032"},8407:{c:"\\2192",f:"VB"},8465:{c:"I",f:"FR"},8476:{c:"R",f:"FR"},8602:{c:"\\2190\\338"},8603:{c:"\\2192\\338"},8622:{c:"\\2194\\338"},8653:{c:"\\21D0\\338"},8654:{c:"\\21D4\\338"},8655:{c:"\\21D2\\338"},8708:{c:"\\2203\\338"},8710:{c:"\\394"},8716:{c:"\\220B\\338"},8740:{c:"\\2223\\338"},8742:{c:"\\2225\\338"},8769:{c:"\\223C\\338"},8772:{c:"\\2243\\338"},8775:{c:"\\2245\\338"},8777:{c:"\\2248\\338"},8802:{c:"\\2261\\338"},8813:{c:"\\224D\\338"},8814:{c:"<\\338"},8815:{c:">\\338"},8816:{c:"\\2264\\338"},8817:{c:"\\2265\\338"},8832:{c:"\\227A\\338"},8833:{c:"\\227B\\338"},8836:{c:"\\2282\\338"},8837:{c:"\\2283\\338"},8840:{c:"\\2286\\338"},8841:{c:"\\2287\\338"},8876:{c:"\\22A2\\338"},8877:{c:"\\22A8\\338"},8930:{c:"\\2291\\338"},8931:{c:"\\2292\\338"},9001:{c:"\\27E8"},9002:{c:"\\27E9"},9653:{c:"\\25B3"},9663:{c:"\\25BD"},10072:{c:"\\2223"},10744:{c:"/",f:"BI"},10799:{c:"\\D7"},12296:{c:"\\27E8"},12297:{c:"\\27E9"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.bold={32:[0,0,.25],33:[.705,0,.35],34:[.694,-.329,.603],35:[.694,.193,.958],36:[.75,.056,.575],37:[.75,.056,.958],38:[.705,.011,.894],39:[.694,-.329,.319],40:[.75,.249,.447],41:[.75,.249,.447],42:[.75,-.306,.575],43:[.633,.131,.894],44:[.171,.194,.319],45:[.278,-.166,.383],46:[.171,0,.319],47:[.75,.25,.575],48:[.654,.01,.575],49:[.655,0,.575],50:[.654,0,.575],51:[.655,.011,.575],52:[.656,0,.575],53:[.655,.011,.575],54:[.655,.011,.575],55:[.676,.011,.575],56:[.654,.011,.575],57:[.654,.011,.575],58:[.444,0,.319],59:[.444,.194,.319],60:[.587,.085,.894],61:[.393,-.109,.894],62:[.587,.085,.894],63:[.7,0,.543],64:[.699,.006,.894],65:[.698,0,.869],66:[.686,0,.818],67:[.697,.011,.831],68:[.686,0,.882],69:[.68,0,.756],70:[.68,0,.724],71:[.697,.01,.904],72:[.686,0,.9],73:[.686,0,.436],74:[.686,.011,.594],75:[.686,0,.901],76:[.686,0,.692],77:[.686,0,1.092],78:[.686,0,.9],79:[.696,.01,.864],80:[.686,0,.786],81:[.696,.193,.864],82:[.686,.011,.862],83:[.697,.011,.639],84:[.675,0,.8],85:[.686,.011,.885],86:[.686,.007,.869],87:[.686,.007,1.189],88:[.686,0,.869],89:[.686,0,.869],90:[.686,0,.703],91:[.75,.25,.319],92:[.75,.25,.575],93:[.75,.25,.319],94:[.694,-.52,.575],95:[-.01,.061,.575],96:[.706,-.503,.575],97:[.453,.006,.559],98:[.694,.006,.639],99:[.453,.006,.511],100:[.694,.006,.639],101:[.452,.006,.527],102:[.7,0,.351,{ic:.101}],103:[.455,.201,.575],104:[.694,0,.639],105:[.695,0,.319],106:[.695,.2,.351],107:[.694,0,.607],108:[.694,0,.319],109:[.45,0,.958],110:[.45,0,.639],111:[.452,.005,.575],112:[.45,.194,.639],113:[.45,.194,.607],114:[.45,0,.474],115:[.453,.006,.454],116:[.635,.005,.447],117:[.45,.006,.639],118:[.444,0,.607],119:[.444,0,.831],120:[.444,0,.607],121:[.444,.2,.607],122:[.444,0,.511],123:[.75,.25,.575],124:[.75,.249,.319],125:[.75,.25,.575],126:[.344,-.202,.575],160:[0,0,.25],168:[.695,-.535,.575],172:[.371,-.061,.767],175:[.607,-.54,.575],176:[.702,-.536,.575],177:[.728,.035,.894],180:[.706,-.503,.575],183:[.336,-.166,.319],215:[.53,.028,.894],247:[.597,.096,.894],305:[.452,.008,.394,{sk:.0319}],567:[.451,.201,.439,{sk:.0958}],697:[.563,-.033,.344],710:[.694,-.52,.575],711:[.66,-.515,.575],713:[.607,-.54,.575],714:[.706,-.503,.575],715:[.706,-.503,.575],728:[.694,-.5,.575],729:[.695,-.525,.575],730:[.702,-.536,.575],732:[.694,-.552,.575],768:[.706,-.503,0],769:[.706,-.503,0],770:[.694,-.52,0],771:[.694,-.552,0],772:[.607,-.54,0],774:[.694,-.5,0],775:[.695,-.525,0],776:[.695,-.535,0],778:[.702,-.536,0],779:[.714,-.511,0],780:[.66,-.515,0],824:[.711,.21,0],913:[.698,0,.869],914:[.686,0,.818],915:[.68,0,.692],916:[.698,0,.958],917:[.68,0,.756],918:[.686,0,.703],919:[.686,0,.9],920:[.696,.01,.894],921:[.686,0,.436],922:[.686,0,.901],923:[.698,0,.806],924:[.686,0,1.092],925:[.686,0,.9],926:[.675,0,.767],927:[.696,.01,.864],928:[.68,0,.9],929:[.686,0,.786],930:[.696,.01,.894],931:[.686,0,.831],932:[.675,0,.8],933:[.697,0,.894],934:[.686,0,.831],935:[.686,0,.869],936:[.686,0,.894],937:[.696,0,.831],945:[.452,.008,.761,{sk:.0319}],946:[.701,.194,.66,{sk:.0958}],947:[.451,.211,.59],948:[.725,.008,.522,{sk:.0639}],949:[.461,.017,.529,{sk:.0958}],950:[.711,.202,.508,{sk:.0958}],951:[.452,.211,.6,{sk:.0639}],952:[.702,.008,.562,{sk:.0958}],953:[.452,.008,.412,{sk:.0639}],954:[.452,.008,.668],955:[.694,.013,.671],956:[.452,.211,.708,{sk:.0319}],957:[.452,0,.577,{sk:.0319}],958:[.711,.201,.508,{sk:.128}],959:[.452,.008,.585,{sk:.0639}],960:[.444,.008,.682],961:[.451,.211,.612,{sk:.0958}],962:[.451,.105,.424,{sk:.0958}],963:[.444,.008,.686],964:[.444,.013,.521,{ic:.089,sk:.0319}],965:[.453,.008,.631,{sk:.0319}],966:[.452,.216,.747,{sk:.0958}],967:[.452,.201,.718,{sk:.0639}],968:[.694,.202,.758,{sk:.128}],969:[.453,.008,.718],977:[.701,.008,.692,{sk:.0958}],978:[.697,0,.894],981:[.694,.202,.712,{sk:.0958}],982:[.444,.008,.975],988:[.68,0,.724],1009:[.451,.194,.612,{sk:.0958}],1013:[.444,.007,.483,{sk:.0639}],8194:[0,0,.5],8195:[0,0,.999],8196:[0,0,.333],8197:[0,0,.25],8198:[0,0,.167],8201:[0,0,.167],8202:[0,0,.083],8211:[.3,-.249,.575],8212:[.3,-.249,1.15],8213:[.3,-.249,1.15],8214:[.75,.248,.575],8215:[-.01,.061,.575],8216:[.694,-.329,.319],8217:[.694,-.329,.319],8220:[.694,-.329,.603],8221:[.694,-.329,.603],8224:[.702,.211,.511],8225:[.702,.202,.511],8226:[.474,-.028,.575],8230:[.171,0,1.295],8242:[.563,-.033,.344],8243:[.563,0,.688],8244:[.563,0,1.032],8254:[.607,-.54,.575],8260:[.75,.25,.575],8279:[.563,0,1.376],8407:[.723,-.513,.575],8463:[.694,.008,.668,{sk:-.0319}],8465:[.686,.026,.554],8467:[.702,.019,.474,{sk:.128}],8472:[.461,.21,.74],8476:[.686,.026,.828],8501:[.694,0,.703],8592:[.518,.017,1.15],8593:[.694,.193,.575],8594:[.518,.017,1.15],8595:[.694,.194,.575],8596:[.518,.017,1.15],8597:[.767,.267,.575],8598:[.724,.194,1.15],8599:[.724,.193,1.15],8600:[.694,.224,1.15],8601:[.694,.224,1.15],8602:[.711,.21,1.15],8603:[.711,.21,1.15],8614:[.518,.017,1.15],8617:[.518,.017,1.282],8618:[.518,.017,1.282],8622:[.711,.21,1.15],8636:[.518,-.22,1.15],8637:[.281,.017,1.15],8640:[.518,-.22,1.15],8641:[.281,.017,1.15],8652:[.718,.017,1.15],8653:[.711,.21,1.15],8654:[.711,.21,1.15],8655:[.711,.21,1.15],8656:[.547,.046,1.15],8657:[.694,.193,.703],8658:[.547,.046,1.15],8659:[.694,.194,.703],8660:[.547,.046,1.15],8661:[.767,.267,.703],8704:[.694,.016,.639],8706:[.71,.017,.628,{sk:.0958}],8707:[.694,0,.639],8708:[.711,.21,.639],8709:[.767,.073,.575],8710:[.698,0,.958],8711:[.686,.024,.958],8712:[.587,.086,.767],8713:[.711,.21,.767],8715:[.587,.086,.767],8716:[.711,.21,.767],8722:[.281,-.221,.894],8723:[.537,.227,.894],8725:[.75,.25,.575],8726:[.75,.25,.575],8727:[.472,-.028,.575],8728:[.474,-.028,.575],8729:[.474,-.028,.575],8730:[.82,.18,.958],8733:[.451,.008,.894],8734:[.452,.008,1.15],8736:[.714,0,.722],8739:[.75,.249,.319],8740:[.75,.249,.319],8741:[.75,.248,.575],8742:[.75,.248,.575],8743:[.604,.017,.767],8744:[.604,.016,.767],8745:[.603,.016,.767],8746:[.604,.016,.767],8747:[.711,.211,.569,{ic:.063}],8764:[.391,-.109,.894],8768:[.583,.082,.319],8769:[.711,.21,.894],8771:[.502,0,.894],8772:[.711,.21,.894],8773:[.638,.027,.894],8775:[.711,.21,.894],8776:[.524,-.032,.894],8777:[.711,.21,.894],8781:[.533,.032,.894],8784:[.721,-.109,.894],8800:[.711,.21,.894],8801:[.505,0,.894],8802:[.711,.21,.894],8804:[.697,.199,.894],8805:[.697,.199,.894],8810:[.617,.116,1.15],8811:[.618,.116,1.15],8813:[.711,.21,.894],8814:[.711,.21,.894],8815:[.711,.21,.894],8816:[.711,.21,.894],8817:[.711,.21,.894],8826:[.585,.086,.894],8827:[.586,.086,.894],8832:[.711,.21,.894],8833:[.711,.21,.894],8834:[.587,.085,.894],8835:[.587,.086,.894],8836:[.711,.21,.894],8837:[.711,.21,.894],8838:[.697,.199,.894],8839:[.697,.199,.894],8840:[.711,.21,.894],8841:[.711,.21,.894],8846:[.604,.016,.767],8849:[.697,.199,.894],8850:[.697,.199,.894],8851:[.604,0,.767],8852:[.604,0,.767],8853:[.632,.132,.894],8854:[.632,.132,.894],8855:[.632,.132,.894],8856:[.632,.132,.894],8857:[.632,.132,.894],8866:[.693,0,.703],8867:[.693,0,.703],8868:[.694,0,.894],8869:[.693,0,.894],8872:[.75,.249,.974],8876:[.711,.21,.703],8877:[.75,.249,.974],8900:[.523,.021,.575],8901:[.336,-.166,.319],8902:[.502,0,.575],8904:[.54,.039,1],8930:[.711,.21,.894],8931:[.711,.21,.894],8942:[.951,.029,.319],8943:[.336,-.166,1.295],8945:[.871,-.101,1.323],8968:[.75,.248,.511],8969:[.75,.248,.511],8970:[.749,.248,.511],8971:[.749,.248,.511],8994:[.405,-.108,1.15],8995:[.392,-.126,1.15],9001:[.75,.249,.447],9002:[.75,.249,.447],9651:[.711,0,1.022],9653:[.711,0,1.022],9657:[.54,.039,.575],9661:[.5,.21,1.022],9663:[.5,.21,1.022],9667:[.539,.038,.575],9711:[.711,.211,1.15],9824:[.719,.129,.894],9825:[.711,.024,.894],9826:[.719,.154,.894],9827:[.719,.129,.894],9837:[.75,.017,.447],9838:[.741,.223,.447],9839:[.724,.224,.447],10072:[.75,.249,.319],10216:[.75,.249,.447],10217:[.75,.249,.447],10229:[.518,.017,1.805],10230:[.518,.017,1.833],10231:[.518,.017,2.126],10232:[.547,.046,1.868],10233:[.547,.046,1.87],10234:[.547,.046,2.126],10236:[.518,.017,1.833],10744:[.711,.21,.894],10799:[.53,.028,.894],10815:[.686,0,.9],10927:[.696,.199,.894],10928:[.697,.199,.894],12296:[.75,.249,.447],12297:[.75,.249,.447]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(181);e.doubleStruck=n.AddCSS(i.doubleStruck,{32:{c:" "},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},107:{c:"k"},913:{c:"A",f:"B"},914:{c:"B",f:"B"},917:{c:"E",f:"B"},918:{c:"Z",f:"B"},919:{c:"H",f:"B"},921:{c:"I",f:"B"},922:{c:"K",f:"B"},924:{c:"M",f:"B"},925:{c:"N",f:"B"},927:{c:"O",f:"B"},929:{c:"P",f:"B"},930:{c:"\\398",f:"B"},932:{c:"T",f:"B"},935:{c:"X",f:"B"},978:{c:"\\3A5",f:"B"},988:{c:"F",f:"B"},8450:{c:"C",f:"A"},8461:{c:"H",f:"A"},8469:{c:"N",f:"A"},8473:{c:"P",f:"A"},8474:{c:"Q",f:"A"},8477:{c:"R",f:"A"},8484:{c:"Z",f:"A"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.doubleStruck={32:[0,0,.25],65:[.701,0,.722],66:[.683,0,.667],67:[.702,.019,.722],68:[.683,0,.722],69:[.683,0,.667],70:[.683,0,.611],71:[.702,.019,.778],72:[.683,0,.778],73:[.683,0,.389],74:[.683,.077,.5],75:[.683,0,.778],76:[.683,0,.667],77:[.683,0,.944],78:[.683,.02,.722],79:[.701,.019,.778],80:[.683,0,.611],81:[.701,.181,.778],82:[.683,0,.722],83:[.702,.012,.556],84:[.683,0,.667],85:[.683,.019,.722],86:[.683,.02,.722],87:[.683,.019,1],88:[.683,0,.722],89:[.683,0,.722],90:[.683,0,.667],107:[.683,0,.556],160:[0,0,.25],913:[.698,0,.869],914:[.686,0,.818],917:[.68,0,.756],918:[.686,0,.703],919:[.686,0,.9],921:[.686,0,.436],922:[.686,0,.901],924:[.686,0,1.092],925:[.686,0,.9],927:[.696,.01,.864],929:[.686,0,.786],930:[.696,.01,.894],932:[.675,0,.8],935:[.686,0,.869],978:[.697,0,.894],988:[.68,0,.724],8450:[.702,.019,.722],8461:[.683,0,.778],8469:[.683,.02,.722],8473:[.683,0,.611],8474:[.701,.181,.778],8477:[.683,0,.722],8484:[.683,0,.667]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(183);e.frakturBold=n.AddCSS(i.frakturBold,{32:{c:" "},33:{c:"!"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},61:{c:"="},63:{c:"?"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},913:{c:"A",f:"B"},914:{c:"B",f:"B"},917:{c:"E",f:"B"},918:{c:"Z",f:"B"},919:{c:"H",f:"B"},921:{c:"I",f:"B"},922:{c:"K",f:"B"},924:{c:"M",f:"B"},925:{c:"N",f:"B"},927:{c:"O",f:"B"},929:{c:"P",f:"B"},930:{c:"\\398",f:"B"},932:{c:"T",f:"B"},935:{c:"X",f:"B"},978:{c:"\\3A5",f:"B"},988:{c:"F",f:"B"},8260:{c:"/"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.frakturBold={32:[0,0,.25],33:[.689,.012,.349],34:[.695,-.432,.254],38:[.696,.016,.871],39:[.695,-.436,.25],40:[.737,.186,.459],41:[.735,.187,.459],42:[.692,-.449,.328],43:[.598,.082,.893],44:[.107,.191,.328],45:[.275,-.236,.893],46:[.102,.015,.328],47:[.721,.182,.593],48:[.501,.012,.593],49:[.489,0,.593],50:[.491,0,.593],51:[.487,.193,.593],52:[.495,.196,.593],53:[.481,.19,.593],54:[.704,.012,.593],55:[.479,.197,.593],56:[.714,.005,.593],57:[.487,.195,.593],58:[.457,.012,.255],59:[.458,.19,.255],61:[.343,-.168,.582],63:[.697,.014,.428],65:[.686,.031,.847],66:[.684,.031,1.044],67:[.676,.032,.723],68:[.683,.029,.982],69:[.686,.029,.783],70:[.684,.146,.722],71:[.687,.029,.927],72:[.683,.126,.851],73:[.681,.025,.655],74:[.68,.141,.652],75:[.681,.026,.789],76:[.683,.028,.786],77:[.683,.032,1.239],78:[.679,.03,.983],79:[.726,.03,.976],80:[.688,.223,.977],81:[.726,.083,.976],82:[.688,.028,.978],83:[.685,.031,.978],84:[.686,.03,.79],85:[.688,.039,.851],86:[.685,.029,.982],87:[.683,.03,1.235],88:[.681,.035,.849],89:[.688,.214,.984],90:[.677,.148,.711],91:[.74,.13,.257],93:[.738,.132,.257],94:[.734,-.452,.59],97:[.472,.032,.603],98:[.69,.032,.59],99:[.473,.026,.464],100:[.632,.028,.589],101:[.471,.027,.472],102:[.687,.222,.388],103:[.472,.208,.595],104:[.687,.207,.615],105:[.686,.025,.331],106:[.682,.203,.332],107:[.682,.025,.464],108:[.681,.024,.337],109:[.476,.031,.921],110:[.473,.028,.654],111:[.482,.034,.609],112:[.557,.207,.604],113:[.485,.211,.596],114:[.472,.026,.46],115:[.479,.034,.523],116:[.648,.027,.393],117:[.472,.032,.589],118:[.546,.027,.604],119:[.549,.032,.918],120:[.471,.188,.459],121:[.557,.221,.589],122:[.471,.214,.461],160:[0,0,.25],913:[.698,0,.869],914:[.686,0,.818],917:[.68,0,.756],918:[.686,0,.703],919:[.686,0,.9],921:[.686,0,.436],922:[.686,0,.901],924:[.686,0,1.092],925:[.686,0,.9],927:[.696,.01,.864],929:[.686,0,.786],930:[.696,.01,.894],932:[.675,0,.8],935:[.686,0,.869],978:[.697,0,.894],988:[.68,0,.724],8216:[.708,-.411,.254],8217:[.692,-.394,.254],8260:[.721,.182,.593],58113:[.63,.027,.587],58114:[.693,.212,.394],58115:[.681,.219,.387],58116:[.473,.212,.593],58117:[.684,.027,.393],58120:[.679,.22,.981],58121:[.717,.137,.727]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(185);e.fraktur=n.AddCSS(i.fraktur,{32:{c:" "},33:{c:"!"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},61:{c:"="},63:{c:"?"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},913:{c:"A",f:""},914:{c:"B",f:""},917:{c:"E",f:""},918:{c:"Z",f:""},919:{c:"H",f:""},921:{c:"I",f:""},922:{c:"K",f:""},924:{c:"M",f:""},925:{c:"N",f:""},927:{c:"O",f:""},929:{c:"P",f:""},930:{c:"\\398",f:""},932:{c:"T",f:""},935:{c:"X",f:""},978:{c:"\\3A5",f:""},988:{c:"F",f:""},8260:{c:"/"},8460:{c:"H",f:"FR"},8465:{c:"I",f:"FR"},8476:{c:"R",f:"FR"},8488:{c:"Z",f:"FR"},8493:{c:"C",f:"FR"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.fraktur={32:[0,0,.25],33:[.689,.012,.296],34:[.695,-.432,.215],38:[.698,.011,.738],39:[.695,-.436,.212],40:[.737,.186,.389],41:[.735,.187,.389],42:[.692,-.449,.278],43:[.598,.082,.756],44:[.107,.191,.278],45:[.275,-.236,.756],46:[.102,.015,.278],47:[.721,.182,.502],48:[.492,.013,.502],49:[.468,0,.502],50:[.474,0,.502],51:[.473,.182,.502],52:[.476,.191,.502],53:[.458,.184,.502],54:[.7,.013,.502],55:[.468,.181,.502],56:[.705,.01,.502],57:[.469,.182,.502],58:[.457,.012,.216],59:[.458,.189,.216],61:[.368,-.132,.756],63:[.693,.011,.362],65:[.696,.026,.718],66:[.691,.027,.884],67:[.685,.024,.613],68:[.685,.027,.832],69:[.685,.024,.663],70:[.686,.153,.611],71:[.69,.026,.785],72:[.666,.133,.72],73:[.686,.026,.554],74:[.686,.139,.552],75:[.68,.027,.668],76:[.686,.026,.666],77:[.692,.027,1.05],78:[.686,.025,.832],79:[.729,.027,.827],80:[.692,.218,.828],81:[.729,.069,.827],82:[.686,.026,.828],83:[.692,.027,.829],84:[.701,.027,.669],85:[.697,.027,.646],86:[.686,.026,.831],87:[.686,.027,1.046],88:[.688,.027,.719],89:[.686,.218,.833],90:[.729,.139,.602],91:[.74,.13,.278],93:[.738,.131,.278],94:[.734,-.452,.5],97:[.47,.035,.5],98:[.685,.031,.513],99:[.466,.029,.389],100:[.609,.033,.499],101:[.467,.03,.401],102:[.681,.221,.326],103:[.47,.209,.504],104:[.688,.205,.521],105:[.673,.02,.279],106:[.672,.208,.281],107:[.689,.025,.389],108:[.685,.02,.28],109:[.475,.026,.767],110:[.475,.022,.527],111:[.48,.028,.489],112:[.541,.212,.5],113:[.479,.219,.489],114:[.474,.021,.389],115:[.478,.029,.443],116:[.64,.02,.333],117:[.474,.023,.517],118:[.53,.028,.512],119:[.532,.028,.774],120:[.472,.188,.389],121:[.528,.218,.499],122:[.471,.214,.391],160:[0,0,.25],913:[.716,0,.75],914:[.683,0,.708],917:[.68,0,.681],918:[.683,0,.611],919:[.683,0,.75],921:[.683,0,.361],922:[.683,0,.778],924:[.683,0,.917],925:[.683,0,.75],927:[.705,.022,.778],929:[.683,0,.681],930:[.705,.022,.778],932:[.677,0,.722],935:[.683,0,.75],978:[.705,0,.778],988:[.68,0,.653],8216:[.708,-.41,.215],8217:[.692,-.395,.215],8260:[.721,.182,.502],8460:[.666,.133,.72],8465:[.686,.026,.554],8476:[.686,.026,.828],8488:[.729,.139,.602],8493:[.685,.024,.613],58112:[.683,.032,.497],58113:[.616,.03,.498],58114:[.68,.215,.333],58115:[.679,.224,.329],58116:[.471,.214,.503],58117:[.686,.02,.333],58118:[.577,.021,.334],58119:[.475,.022,.501]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(187);e.italic=n.AddCSS(i.italic,{32:{c:" "},33:{c:"!"},35:{c:"#"},37:{c:"%"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},61:{c:"="},63:{c:"?"},64:{c:"@"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},95:{c:"_"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},126:{c:"~"},913:{c:"A"},914:{c:"B"},917:{c:"E"},918:{c:"Z"},919:{c:"H"},921:{c:"I"},922:{c:"K"},924:{c:"M"},925:{c:"N"},927:{c:"O"},929:{c:"P"},930:{c:"\\398"},932:{c:"T"},935:{c:"X"},978:{c:"\\3A5"},988:{c:"F"},8213:{c:"\\2014"},8215:{c:"_"},8260:{c:"/"},8462:{c:"h",f:"I"},8710:{c:"\\394"},10744:{c:"/",f:"I"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.italic={32:[0,0,.25],33:[.716,0,.307,{ic:.073}],34:[.694,-.379,.514],35:[.694,.194,.818],37:[.75,.056,.818],38:[.716,.022,.767],39:[.694,-.379,.307,{ic:.07}],40:[.75,.25,.409,{ic:.108}],41:[.75,.25,.409],42:[.75,-.32,.511,{ic:.073}],43:[.557,.057,.767],44:[.121,.194,.307],45:[.251,-.18,.358],46:[.121,0,.307],47:[.716,.215,.778],48:[.665,.021,.511,{ic:.051}],49:[.666,0,.511],50:[.666,.022,.511],51:[.666,.022,.511,{ic:.051}],52:[.666,.194,.511],53:[.666,.022,.511,{ic:.056}],54:[.665,.022,.511,{ic:.054}],55:[.666,.022,.511,{ic:.123}],56:[.666,.021,.511],57:[.666,.022,.511],58:[.431,0,.307],59:[.431,.194,.307],61:[.367,-.133,.767],63:[.716,0,.511],64:[.705,.011,.767],65:[.716,0,.75,{sk:.139}],66:[.683,0,.759,{sk:.0833}],67:[.705,.022,.715,{sk:.0833}],68:[.683,0,.828,{sk:.0556}],69:[.68,0,.738,{sk:.0833}],70:[.68,0,.643,{ic:.106,sk:.0833}],71:[.705,.022,.786,{sk:.0833}],72:[.683,0,.831,{ic:.057,sk:.0556}],73:[.683,0,.44,{ic:.064,sk:.111}],74:[.683,.022,.555,{ic:.078,sk:.167}],75:[.683,0,.849,{sk:.0556}],76:[.683,0,.681,{sk:.0278}],77:[.683,0,.97,{ic:.081,sk:.0833}],78:[.683,0,.803,{ic:.085,sk:.0833}],79:[.704,.022,.763,{sk:.0833}],80:[.683,0,.642,{ic:.109,sk:.0833}],81:[.704,.194,.791,{sk:.0833}],82:[.683,.021,.759,{sk:.0833}],83:[.705,.022,.613,{sk:.0833}],84:[.677,0,.584,{ic:.12,sk:.0833}],85:[.683,.022,.683,{ic:.084,sk:.0278}],86:[.683,.022,.583,{ic:.186}],87:[.683,.022,.944,{ic:.104}],88:[.683,0,.828,{sk:.0833}],89:[.683,0,.581,{ic:.182}],90:[.683,0,.683,{sk:.0833}],91:[.75,.25,.307,{ic:.139}],93:[.75,.25,.307,{ic:.052}],94:[.694,-.527,.511],95:[-.025,.062,.511],97:[.441,.01,.529],98:[.694,.011,.429],99:[.442,.011,.433,{sk:.0556}],100:[.694,.01,.52,{sk:.167}],101:[.442,.011,.466,{sk:.0556}],102:[.705,.205,.49,{ic:.06,sk:.167}],103:[.442,.205,.477,{sk:.0278}],104:[.694,.011,.576,{sk:-.0278}],105:[.661,.011,.345],106:[.661,.204,.412],107:[.694,.011,.521],108:[.694,.011,.298,{sk:.0833}],109:[.442,.011,.878],110:[.442,.011,.6],111:[.441,.011,.485,{sk:.0556}],112:[.442,.194,.503,{sk:.0833}],113:[.442,.194,.446,{sk:.0833}],114:[.442,.011,.451,{sk:.0556}],115:[.442,.01,.469,{sk:.0556}],116:[.626,.011,.361,{sk:.0833}],117:[.442,.011,.572,{sk:.0278}],118:[.443,.011,.485,{sk:.0278}],119:[.443,.011,.716,{sk:.0833}],120:[.442,.011,.572,{sk:.0278}],121:[.442,.205,.49,{sk:.0556}],122:[.442,.011,.465,{sk:.0556}],126:[.318,-.208,.511,{ic:.06}],160:[0,0,.25],163:[.714,.011,.769],305:[.441,.01,.307],567:[.442,.204,.332],768:[.697,-.5,0],769:[.697,-.5,0],770:[.694,-.527,0],771:[.668,-.558,0,{ic:.06}],772:[.589,-.544,0,{ic:.054}],774:[.694,-.515,0,{ic:.062}],775:[.669,-.548,0],776:[.669,-.554,0],778:[.716,-.542,0],779:[.697,-.503,0,{ic:.065}],780:[.638,-.502,0],913:[.716,0,.75,{sk:.139}],914:[.683,0,.759,{sk:.0833}],915:[.68,0,.615,{ic:.106,sk:.0833}],916:[.716,0,.833,{sk:.167}],917:[.68,0,.738,{sk:.0833}],918:[.683,0,.683,{sk:.0833}],919:[.683,0,.831,{ic:.057,sk:.0556}],920:[.704,.022,.763,{sk:.0833}],921:[.683,0,.44,{ic:.064,sk:.111}],922:[.683,0,.849,{sk:.0556}],923:[.716,0,.694,{sk:.167}],924:[.683,0,.97,{ic:.081,sk:.0833}],925:[.683,0,.803,{ic:.085,sk:.0833}],926:[.677,0,.742,{sk:.0833}],927:[.704,.022,.763,{sk:.0833}],928:[.68,0,.831,{ic:.056,sk:.0556}],929:[.683,0,.642,{ic:.109,sk:.0833}],930:[.704,.022,.763,{sk:.0833}],931:[.683,0,.78,{sk:.0833}],932:[.677,0,.584,{ic:.12,sk:.0833}],933:[.705,0,.583,{ic:.117,sk:.0556}],934:[.683,0,.667,{sk:.0833}],935:[.683,0,.828,{sk:.0833}],936:[.683,0,.612,{ic:.08,sk:.0556}],937:[.704,0,.772,{sk:.0833}],945:[.442,.011,.64,{sk:.0278}],946:[.705,.194,.566,{sk:.0833}],947:[.441,.216,.518],948:[.717,.01,.444,{sk:.0556}],949:[.452,.022,.466,{sk:.0833}],950:[.704,.204,.438,{sk:.0833}],951:[.442,.216,.497,{sk:.0556}],952:[.705,.01,.469,{sk:.0833}],953:[.442,.01,.354,{sk:.0556}],954:[.442,.011,.576],955:[.694,.012,.583],956:[.442,.216,.603,{sk:.0278}],957:[.442,0,.494,{sk:.0278}],958:[.704,.205,.438,{sk:.111}],959:[.441,.011,.485,{sk:.0556}],960:[.431,.011,.57],961:[.442,.216,.517,{sk:.0833}],962:[.442,.107,.363,{sk:.0833}],963:[.431,.011,.571],964:[.431,.013,.437,{ic:.08,sk:.0278}],965:[.443,.01,.54,{sk:.0278}],966:[.442,.218,.654,{sk:.0833}],967:[.442,.204,.626,{sk:.0556}],968:[.694,.205,.651,{sk:.111}],969:[.443,.011,.622],977:[.705,.011,.591,{sk:.0833}],978:[.705,0,.583,{ic:.117,sk:.0556}],981:[.694,.205,.596,{sk:.0833}],982:[.431,.01,.828],988:[.68,0,.643,{ic:.106,sk:.0833}],1009:[.442,.194,.517,{sk:.0833}],1013:[.431,.011,.406,{sk:.0556}],8211:[.285,-.248,.511],8212:[.285,-.248,1.022],8213:[.285,-.248,1.022],8215:[-.025,.062,.511],8216:[.694,-.379,.307,{ic:.055}],8217:[.694,-.379,.307,{ic:.07}],8220:[.694,-.379,.514,{ic:.092}],8221:[.694,-.379,.514],8260:[.716,.215,.778],8462:[.694,.011,.576,{sk:-.0278}],8463:[.695,.013,.54],8710:[.716,0,.833,{sk:.167}],10744:[.716,.215,.778]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(189);e.largeop=n.AddCSS(i.largeop,{32:{c:" "},40:{c:"("},41:{c:")"},47:{c:"/"},91:{c:"["},93:{c:"]"},123:{c:"{"},125:{c:"}"},8260:{c:"/"},9001:{c:"\\27E8"},9002:{c:"\\27E9"},10072:{c:"\\2223"},10764:{c:"\\222C\\222C"},12296:{c:"\\27E8"},12297:{c:"\\27E9"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.largeop={32:[0,0,.25],40:[1.15,.649,.597],41:[1.15,.649,.597],47:[1.15,.649,.811],91:[1.15,.649,.472],92:[1.15,.649,.811],93:[1.15,.649,.472],123:[1.15,.649,.667],125:[1.15,.649,.667],160:[0,0,.25],710:[.772,-.565,1],732:[.75,-.611,1],770:[.772,-.565,0],771:[.75,-.611,0],8214:[.602,0,.778],8260:[1.15,.649,.811],8593:[.6,0,.667],8595:[.6,0,.667],8657:[.599,0,.778],8659:[.6,0,.778],8719:[.95,.45,1.278],8720:[.95,.45,1.278],8721:[.95,.45,1.444],8730:[1.15,.65,1],8739:[.627,.015,.333],8741:[.627,.015,.556],8747:[1.36,.862,.556,{ic:.388}],8748:[1.36,.862,1.084,{ic:.388}],8749:[1.36,.862,1.592,{ic:.388}],8750:[1.36,.862,.556,{ic:.388}],8896:[.95,.45,1.111],8897:[.95,.45,1.111],8898:[.949,.45,1.111],8899:[.95,.449,1.111],8968:[1.15,.649,.528],8969:[1.15,.649,.528],8970:[1.15,.649,.528],8971:[1.15,.649,.528],9001:[1.15,.649,.611],9002:[1.15,.649,.611],9168:[.602,0,.667],10072:[.627,.015,.333],10216:[1.15,.649,.611],10217:[1.15,.649,.611],10752:[.949,.449,1.511],10753:[.949,.449,1.511],10754:[.949,.449,1.511],10756:[.95,.449,1.111],10758:[.95,.45,1.111],10764:[1.36,.862,2.168,{ic:.388}],12296:[1.15,.649,.611],12297:[1.15,.649,.611]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(191);e.monospace=n.AddCSS(i.monospace,{32:{c:" "},33:{c:"!"},35:{c:"#"},36:{c:"$"},37:{c:"%"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},60:{c:"<"},61:{c:"="},62:{c:">"},63:{c:"?"},64:{c:"@"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},95:{c:"_"},96:{c:"`"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},123:{c:"{"},124:{c:"|"},125:{c:"}"},126:{c:"~"},697:{c:"\\2032"},913:{c:"A"},914:{c:"B"},917:{c:"E"},918:{c:"Z"},919:{c:"H"},921:{c:"I"},922:{c:"K"},924:{c:"M"},925:{c:"N"},927:{c:"O"},929:{c:"P"},930:{c:"\\398"},932:{c:"T"},935:{c:"X"},978:{c:"\\3A5"},988:{c:"F"},8215:{c:"_"},8243:{c:"\\2032\\2032"},8244:{c:"\\2032\\2032\\2032"},8260:{c:"/"},8279:{c:"\\2032\\2032\\2032\\2032"},8710:{c:"\\394"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.monospace={32:[0,0,.525],33:[.622,0,.525],34:[.623,-.333,.525],35:[.611,0,.525],36:[.694,.082,.525],37:[.694,.083,.525],38:[.622,.011,.525],39:[.611,-.287,.525],40:[.694,.082,.525],41:[.694,.082,.525],42:[.52,-.09,.525],43:[.531,-.081,.525],44:[.14,.139,.525],45:[.341,-.271,.525],46:[.14,0,.525],47:[.694,.083,.525],48:[.621,.01,.525],49:[.622,0,.525],50:[.622,0,.525],51:[.622,.011,.525],52:[.624,0,.525],53:[.611,.01,.525],54:[.622,.011,.525],55:[.627,.01,.525],56:[.621,.01,.525],57:[.622,.011,.525],58:[.431,0,.525],59:[.431,.139,.525],60:[.557,-.055,.525],61:[.417,-.195,.525],62:[.557,-.055,.525],63:[.617,0,.525],64:[.617,.006,.525],65:[.623,0,.525],66:[.611,0,.525],67:[.622,.011,.525],68:[.611,0,.525],69:[.611,0,.525],70:[.611,0,.525],71:[.622,.011,.525],72:[.611,0,.525],73:[.611,0,.525],74:[.611,.011,.525],75:[.611,0,.525],76:[.611,0,.525],77:[.611,0,.525],78:[.611,0,.525],79:[.621,.01,.525],80:[.611,0,.525],81:[.621,.138,.525],82:[.611,.011,.525],83:[.622,.011,.525],84:[.611,0,.525],85:[.611,.011,.525],86:[.611,.007,.525],87:[.611,.007,.525],88:[.611,0,.525],89:[.611,0,.525],90:[.611,0,.525],91:[.694,.082,.525],92:[.694,.083,.525],93:[.694,.082,.525],94:[.611,-.46,.525],95:[-.025,.095,.525],96:[.681,-.357,.525],97:[.439,.006,.525],98:[.611,.006,.525],99:[.44,.006,.525],100:[.611,.006,.525],101:[.44,.006,.525],102:[.617,0,.525],103:[.442,.229,.525],104:[.611,0,.525],105:[.612,0,.525],106:[.612,.228,.525],107:[.611,0,.525],108:[.611,0,.525],109:[.436,0,.525],110:[.436,0,.525],111:[.44,.006,.525],112:[.437,.221,.525],113:[.437,.221,.525],114:[.437,0,.525],115:[.44,.006,.525],116:[.554,.006,.525],117:[.431,.005,.525],118:[.431,0,.525],119:[.431,0,.525],120:[.431,0,.525],121:[.431,.228,.525],122:[.431,0,.525],123:[.694,.083,.525],124:[.694,.082,.525],125:[.694,.083,.525],126:[.611,-.466,.525],127:[.612,-.519,.525],160:[0,0,.525],305:[.431,0,.525],567:[.431,.228,.525],697:[.623,-.334,.525],768:[.611,-.485,0],769:[.611,-.485,0],770:[.611,-.46,0],771:[.611,-.466,0],772:[.577,-.5,0],774:[.611,-.504,0],776:[.612,-.519,0],778:[.619,-.499,0],780:[.577,-.449,0],913:[.623,0,.525],914:[.611,0,.525],915:[.611,0,.525],916:[.623,0,.525],917:[.611,0,.525],918:[.611,0,.525],919:[.611,0,.525],920:[.621,.01,.525],921:[.611,0,.525],922:[.611,0,.525],923:[.623,0,.525],924:[.611,0,.525],925:[.611,0,.525],926:[.611,0,.525],927:[.621,.01,.525],928:[.611,0,.525],929:[.611,0,.525],930:[.621,.01,.525],931:[.611,0,.525],932:[.611,0,.525],933:[.622,0,.525],934:[.611,0,.525],935:[.611,0,.525],936:[.611,0,.525],937:[.622,0,.525],978:[.622,0,.525],988:[.611,0,.525],8215:[-.025,.095,.525],8242:[.623,-.334,.525],8243:[.623,0,1.05],8244:[.623,0,1.575],8260:[.694,.083,.525],8279:[.623,0,2.1],8710:[.623,0,.525]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(193);e.normal=n.AddCSS(i.normal,{32:{c:" "},33:{c:"!"},35:{c:"#"},36:{c:"$"},37:{c:"%"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},60:{c:"<"},61:{c:"="},62:{c:">"},63:{c:"?"},64:{c:"@"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},95:{c:"_"},96:{c:"`"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},123:{c:"{"},124:{c:"|"},125:{c:"}"},126:{c:"~"},163:{f:"MI"},183:{c:"\\22C5"},697:{c:"\\2032"},913:{c:"A"},914:{c:"B"},917:{c:"E"},918:{c:"Z"},919:{c:"H"},921:{c:"I"},922:{c:"K"},924:{c:"M"},925:{c:"N"},927:{c:"O"},929:{c:"P"},930:{c:"\\398"},932:{c:"T"},935:{c:"X"},978:{c:"\\3A5"},988:{c:"F"},1014:{c:"\\220D"},8192:{c:""},8193:{c:""},8194:{c:""},8195:{c:""},8196:{c:""},8197:{c:""},8198:{c:""},8201:{c:""},8202:{c:""},8203:{c:""},8204:{c:""},8213:{c:"\\2014"},8214:{c:"\\2225"},8215:{c:"_"},8226:{c:"\\2219"},8243:{c:"\\2032\\2032"},8244:{c:"\\2032\\2032\\2032"},8246:{c:"\\2035\\2035"},8247:{c:"\\2035\\2035\\2035"},8254:{c:"\\2C9"},8260:{c:"/"},8279:{c:"\\2032\\2032\\2032\\2032"},8289:{c:""},8290:{c:""},8291:{c:""},8292:{c:""},8407:{c:"\\2192",f:"V"},8450:{c:"C",f:"A"},8459:{c:"H",f:"SC"},8460:{c:"H",f:"FR"},8461:{c:"H",f:"A"},8462:{c:"h",f:"I"},8463:{f:""},8464:{c:"J",f:"SC"},8465:{c:"I",f:"FR"},8466:{c:"L",f:"SC"},8469:{c:"N",f:"A"},8473:{c:"P",f:"A"},8474:{c:"Q",f:"A"},8475:{c:"R",f:"SC"},8476:{c:"R",f:"FR"},8477:{c:"R",f:"A"},8484:{c:"Z",f:"A"},8486:{c:"\\3A9",f:""},8488:{c:"Z",f:"FR"},8492:{c:"B",f:"SC"},8493:{c:"C",f:"FR"},8496:{c:"E",f:"SC"},8497:{c:"F",f:"SC"},8499:{c:"M",f:"SC"},8708:{c:"\\2203\\338"},8710:{c:"\\394"},8716:{c:"\\220B\\338"},8726:{f:""},8772:{c:"\\2243\\338"},8775:{c:"\\2246"},8777:{c:"\\2248\\338"},8802:{c:"\\2261\\338"},8813:{c:"\\224D\\338"},8820:{c:"\\2272\\338"},8821:{c:"\\2273\\338"},8824:{c:"\\2276\\338"},8825:{c:"\\2277\\338"},8836:{c:"\\2282\\338"},8837:{c:"\\2283\\338"},8930:{c:"\\2291\\338"},8931:{c:"\\2292\\338"},8965:{c:"\\22BC"},8966:{c:"\\2A5E"},8988:{c:"\\250C"},8989:{c:"\\2510"},8990:{c:"\\2514"},8991:{c:"\\2518"},9001:{c:"\\27E8"},9002:{c:"\\27E9"},9642:{c:"\\25A0"},9652:{c:"\\25B2"},9653:{c:"\\25B3"},9656:{c:"\\25B6"},9662:{c:"\\25BC"},9663:{c:"\\25BD"},9666:{c:"\\25C0"},9723:{c:"\\25A1"},9724:{c:"\\25A0"},10072:{c:"\\2223"},10744:{c:"/",f:"I"},10764:{c:"\\222C\\222C"},10799:{c:"\\D7"},12296:{c:"\\27E8"},12297:{c:"\\27E9"},119808:{c:"A",f:"B"},119809:{c:"B",f:"B"},119810:{c:"C",f:"B"},119811:{c:"D",f:"B"},119812:{c:"E",f:"B"},119813:{c:"F",f:"B"},119814:{c:"G",f:"B"},119815:{c:"H",f:"B"},119816:{c:"I",f:"B"},119817:{c:"J",f:"B"},119818:{c:"K",f:"B"},119819:{c:"L",f:"B"},119820:{c:"M",f:"B"},119821:{c:"N",f:"B"},119822:{c:"O",f:"B"},119823:{c:"P",f:"B"},119824:{c:"Q",f:"B"},119825:{c:"R",f:"B"},119826:{c:"S",f:"B"},119827:{c:"T",f:"B"},119828:{c:"U",f:"B"},119829:{c:"V",f:"B"},119830:{c:"W",f:"B"},119831:{c:"X",f:"B"},119832:{c:"Y",f:"B"},119833:{c:"Z",f:"B"},119834:{c:"a",f:"B"},119835:{c:"b",f:"B"},119836:{c:"c",f:"B"},119837:{c:"d",f:"B"},119838:{c:"e",f:"B"},119839:{c:"f",f:"B"},119840:{c:"g",f:"B"},119841:{c:"h",f:"B"},119842:{c:"i",f:"B"},119843:{c:"j",f:"B"},119844:{c:"k",f:"B"},119845:{c:"l",f:"B"},119846:{c:"m",f:"B"},119847:{c:"n",f:"B"},119848:{c:"o",f:"B"},119849:{c:"p",f:"B"},119850:{c:"q",f:"B"},119851:{c:"r",f:"B"},119852:{c:"s",f:"B"},119853:{c:"t",f:"B"},119854:{c:"u",f:"B"},119855:{c:"v",f:"B"},119856:{c:"w",f:"B"},119857:{c:"x",f:"B"},119858:{c:"y",f:"B"},119859:{c:"z",f:"B"},119860:{c:"A",f:"I"},119861:{c:"B",f:"I"},119862:{c:"C",f:"I"},119863:{c:"D",f:"I"},119864:{c:"E",f:"I"},119865:{c:"F",f:"I"},119866:{c:"G",f:"I"},119867:{c:"H",f:"I"},119868:{c:"I",f:"I"},119869:{c:"J",f:"I"},119870:{c:"K",f:"I"},119871:{c:"L",f:"I"},119872:{c:"M",f:"I"},119873:{c:"N",f:"I"},119874:{c:"O",f:"I"},119875:{c:"P",f:"I"},119876:{c:"Q",f:"I"},119877:{c:"R",f:"I"},119878:{c:"S",f:"I"},119879:{c:"T",f:"I"},119880:{c:"U",f:"I"},119881:{c:"V",f:"I"},119882:{c:"W",f:"I"},119883:{c:"X",f:"I"},119884:{c:"Y",f:"I"},119885:{c:"Z",f:"I"},119886:{c:"a",f:"I"},119887:{c:"b",f:"I"},119888:{c:"c",f:"I"},119889:{c:"d",f:"I"},119890:{c:"e",f:"I"},119891:{c:"f",f:"I"},119892:{c:"g",f:"I"},119893:{c:"h",f:"I"},119894:{c:"i",f:"I"},119895:{c:"j",f:"I"},119896:{c:"k",f:"I"},119897:{c:"l",f:"I"},119898:{c:"m",f:"I"},119899:{c:"n",f:"I"},119900:{c:"o",f:"I"},119901:{c:"p",f:"I"},119902:{c:"q",f:"I"},119903:{c:"r",f:"I"},119904:{c:"s",f:"I"},119905:{c:"t",f:"I"},119906:{c:"u",f:"I"},119907:{c:"v",f:"I"},119908:{c:"w",f:"I"},119909:{c:"x",f:"I"},119910:{c:"y",f:"I"},119911:{c:"z",f:"I"},119912:{c:"A",f:"BI"},119913:{c:"B",f:"BI"},119914:{c:"C",f:"BI"},119915:{c:"D",f:"BI"},119916:{c:"E",f:"BI"},119917:{c:"F",f:"BI"},119918:{c:"G",f:"BI"},119919:{c:"H",f:"BI"},119920:{c:"I",f:"BI"},119921:{c:"J",f:"BI"},119922:{c:"K",f:"BI"},119923:{c:"L",f:"BI"},119924:{c:"M",f:"BI"},119925:{c:"N",f:"BI"},119926:{c:"O",f:"BI"},119927:{c:"P",f:"BI"},119928:{c:"Q",f:"BI"},119929:{c:"R",f:"BI"},119930:{c:"S",f:"BI"},119931:{c:"T",f:"BI"},119932:{c:"U",f:"BI"},119933:{c:"V",f:"BI"},119934:{c:"W",f:"BI"},119935:{c:"X",f:"BI"},119936:{c:"Y",f:"BI"},119937:{c:"Z",f:"BI"},119938:{c:"a",f:"BI"},119939:{c:"b",f:"BI"},119940:{c:"c",f:"BI"},119941:{c:"d",f:"BI"},119942:{c:"e",f:"BI"},119943:{c:"f",f:"BI"},119944:{c:"g",f:"BI"},119945:{c:"h",f:"BI"},119946:{c:"i",f:"BI"},119947:{c:"j",f:"BI"},119948:{c:"k",f:"BI"},119949:{c:"l",f:"BI"},119950:{c:"m",f:"BI"},119951:{c:"n",f:"BI"},119952:{c:"o",f:"BI"},119953:{c:"p",f:"BI"},119954:{c:"q",f:"BI"},119955:{c:"r",f:"BI"},119956:{c:"s",f:"BI"},119957:{c:"t",f:"BI"},119958:{c:"u",f:"BI"},119959:{c:"v",f:"BI"},119960:{c:"w",f:"BI"},119961:{c:"x",f:"BI"},119962:{c:"y",f:"BI"},119963:{c:"z",f:"BI"},119964:{c:"A",f:"SC"},119965:{c:"B",f:"SC"},119966:{c:"C",f:"SC"},119967:{c:"D",f:"SC"},119968:{c:"E",f:"SC"},119969:{c:"F",f:"SC"},119970:{c:"G",f:"SC"},119971:{c:"H",f:"SC"},119972:{c:"I",f:"SC"},119973:{c:"J",f:"SC"},119974:{c:"K",f:"SC"},119975:{c:"L",f:"SC"},119976:{c:"M",f:"SC"},119977:{c:"N",f:"SC"},119978:{c:"O",f:"SC"},119979:{c:"P",f:"SC"},119980:{c:"Q",f:"SC"},119981:{c:"R",f:"SC"},119982:{c:"S",f:"SC"},119983:{c:"T",f:"SC"},119984:{c:"U",f:"SC"},119985:{c:"V",f:"SC"},119986:{c:"W",f:"SC"},119987:{c:"X",f:"SC"},119988:{c:"Y",f:"SC"},119989:{c:"Z",f:"SC"},119990:{c:"a",f:"I"},119991:{c:"b",f:"I"},119992:{c:"c",f:"I"},119993:{c:"d",f:"I"},119994:{c:"e",f:"I"},119995:{c:"f",f:"I"},119996:{c:"g",f:"I"},119997:{c:"h",f:"I"},119998:{c:"i",f:"I"},119999:{c:"j",f:"I"},12e4:{c:"k",f:"I"},120001:{c:"l",f:"I"},120002:{c:"m",f:"I"},120003:{c:"n",f:"I"},120004:{c:"o",f:"I"},120005:{c:"p",f:"I"},120006:{c:"q",f:"I"},120007:{c:"r",f:"I"},120008:{c:"s",f:"I"},120009:{c:"t",f:"I"},120010:{c:"u",f:"I"},120011:{c:"v",f:"I"},120012:{c:"w",f:"I"},120013:{c:"x",f:"I"},120014:{c:"y",f:"I"},120015:{c:"z",f:"I"},120016:{c:"A",f:"SC"},120017:{c:"B",f:"SC"},120018:{c:"C",f:"SC"},120019:{c:"D",f:"SC"},120020:{c:"E",f:"SC"},120021:{c:"F",f:"SC"},120022:{c:"G",f:"SC"},120023:{c:"H",f:"SC"},120024:{c:"I",f:"SC"},120025:{c:"J",f:"SC"},120026:{c:"K",f:"SC"},120027:{c:"L",f:"SC"},120028:{c:"M",f:"SC"},120029:{c:"N",f:"SC"},120030:{c:"O",f:"SC"},120031:{c:"P",f:"SC"},120032:{c:"Q",f:"SC"},120033:{c:"R",f:"SC"},120034:{c:"S",f:"SC"},120035:{c:"T",f:"SC"},120036:{c:"U",f:"SC"},120037:{c:"V",f:"SC"},120038:{c:"W",f:"SC"},120039:{c:"X",f:"SC"},120040:{c:"Y",f:"SC"},120041:{c:"Z",f:"SC"},120042:{c:"a",f:"BI"},120043:{c:"b",f:"BI"},120044:{c:"c",f:"BI"},120045:{c:"d",f:"BI"},120046:{c:"e",f:"BI"},120047:{c:"f",f:"BI"},120048:{c:"g",f:"BI"},120049:{c:"h",f:"BI"},120050:{c:"i",f:"BI"},120051:{c:"j",f:"BI"},120052:{c:"k",f:"BI"},120053:{c:"l",f:"BI"},120054:{c:"m",f:"BI"},120055:{c:"n",f:"BI"},120056:{c:"o",f:"BI"},120057:{c:"p",f:"BI"},120058:{c:"q",f:"BI"},120059:{c:"r",f:"BI"},120060:{c:"s",f:"BI"},120061:{c:"t",f:"BI"},120062:{c:"u",f:"BI"},120063:{c:"v",f:"BI"},120064:{c:"w",f:"BI"},120065:{c:"x",f:"BI"},120066:{c:"y",f:"BI"},120067:{c:"z",f:"BI"},120068:{c:"A",f:"FR"},120069:{c:"B",f:"FR"},120070:{c:"C",f:"FR"},120071:{c:"D",f:"FR"},120072:{c:"E",f:"FR"},120073:{c:"F",f:"FR"},120074:{c:"G",f:"FR"},120075:{c:"H",f:"FR"},120076:{c:"I",f:"FR"},120077:{c:"J",f:"FR"},120078:{c:"K",f:"FR"},120079:{c:"L",f:"FR"},120080:{c:"M",f:"FR"},120081:{c:"N",f:"FR"},120082:{c:"O",f:"FR"},120083:{c:"P",f:"FR"},120084:{c:"Q",f:"FR"},120085:{c:"R",f:"FR"},120086:{c:"S",f:"FR"},120087:{c:"T",f:"FR"},120088:{c:"U",f:"FR"},120089:{c:"V",f:"FR"},120090:{c:"W",f:"FR"},120091:{c:"X",f:"FR"},120092:{c:"Y",f:"FR"},120093:{c:"Z",f:"FR"},120094:{c:"a",f:"FR"},120095:{c:"b",f:"FR"},120096:{c:"c",f:"FR"},120097:{c:"d",f:"FR"},120098:{c:"e",f:"FR"},120099:{c:"f",f:"FR"},120100:{c:"g",f:"FR"},120101:{c:"h",f:"FR"},120102:{c:"i",f:"FR"},120103:{c:"j",f:"FR"},120104:{c:"k",f:"FR"},120105:{c:"l",f:"FR"},120106:{c:"m",f:"FR"},120107:{c:"n",f:"FR"},120108:{c:"o",f:"FR"},120109:{c:"p",f:"FR"},120110:{c:"q",f:"FR"},120111:{c:"r",f:"FR"},120112:{c:"s",f:"FR"},120113:{c:"t",f:"FR"},120114:{c:"u",f:"FR"},120115:{c:"v",f:"FR"},120116:{c:"w",f:"FR"},120117:{c:"x",f:"FR"},120118:{c:"y",f:"FR"},120119:{c:"z",f:"FR"},120120:{c:"A",f:"A"},120121:{c:"B",f:"A"},120122:{c:"C",f:"A"},120123:{c:"D",f:"A"},120124:{c:"E",f:"A"},120125:{c:"F",f:"A"},120126:{c:"G",f:"A"},120127:{c:"H",f:"A"},120128:{c:"I",f:"A"},120129:{c:"J",f:"A"},120130:{c:"K",f:"A"},120131:{c:"L",f:"A"},120132:{c:"M",f:"A"},120133:{c:"N",f:"A"},120134:{c:"O",f:"A"},120135:{c:"P",f:"A"},120136:{c:"Q",f:"A"},120137:{c:"R",f:"A"},120138:{c:"S",f:"A"},120139:{c:"T",f:"A"},120140:{c:"U",f:"A"},120141:{c:"V",f:"A"},120142:{c:"W",f:"A"},120143:{c:"X",f:"A"},120144:{c:"Y",f:"A"},120145:{c:"Z",f:"A"},120146:{c:"a",f:"B"},120147:{c:"b",f:"B"},120148:{c:"c",f:"B"},120149:{c:"d",f:"B"},120150:{c:"e",f:"B"},120151:{c:"f",f:"B"},120152:{c:"g",f:"B"},120153:{c:"h",f:"B"},120154:{c:"i",f:"B"},120155:{c:"j",f:"B"},120156:{c:"k",f:"A"},120157:{c:"l",f:"B"},120158:{c:"m",f:"B"},120159:{c:"n",f:"B"},120160:{c:"o",f:"B"},120161:{c:"p",f:"B"},120162:{c:"q",f:"B"},120163:{c:"r",f:"B"},120164:{c:"s",f:"B"},120165:{c:"t",f:"B"},120166:{c:"u",f:"B"},120167:{c:"v",f:"B"},120168:{c:"w",f:"B"},120169:{c:"x",f:"B"},120170:{c:"y",f:"B"},120171:{c:"z",f:"B"},120172:{c:"A",f:"FR-B"},120173:{c:"B",f:"FR-B"},120174:{c:"C",f:"FR-B"},120175:{c:"D",f:"FR-B"},120176:{c:"E",f:"FR-B"},120177:{c:"F",f:"FR-B"},120178:{c:"G",f:"FR-B"},120179:{c:"H",f:"FR-B"},120180:{c:"I",f:"FR-B"},120181:{c:"J",f:"FR-B"},120182:{c:"K",f:"FR-B"},120183:{c:"L",f:"FR-B"},120184:{c:"M",f:"FR-B"},120185:{c:"N",f:"FR-B"},120186:{c:"O",f:"FR-B"},120187:{c:"P",f:"FR-B"},120188:{c:"Q",f:"FR-B"},120189:{c:"R",f:"FR-B"},120190:{c:"S",f:"FR-B"},120191:{c:"T",f:"FR-B"},120192:{c:"U",f:"FR-B"},120193:{c:"V",f:"FR-B"},120194:{c:"W",f:"FR-B"},120195:{c:"X",f:"FR-B"},120196:{c:"Y",f:"FR-B"},120197:{c:"Z",f:"FR-B"},120198:{c:"a",f:"FR-B"},120199:{c:"b",f:"FR-B"},120200:{c:"c",f:"FR-B"},120201:{c:"d",f:"FR-B"},120202:{c:"e",f:"FR-B"},120203:{c:"f",f:"FR-B"},120204:{c:"g",f:"FR-B"},120205:{c:"h",f:"FR-B"},120206:{c:"i",f:"FR-B"},120207:{c:"j",f:"FR-B"},120208:{c:"k",f:"FR-B"},120209:{c:"l",f:"FR-B"},120210:{c:"m",f:"FR-B"},120211:{c:"n",f:"FR-B"},120212:{c:"o",f:"FR-B"},120213:{c:"p",f:"FR-B"},120214:{c:"q",f:"FR-B"},120215:{c:"r",f:"FR-B"},120216:{c:"s",f:"FR-B"},120217:{c:"t",f:"FR-B"},120218:{c:"u",f:"FR-B"},120219:{c:"v",f:"FR-B"},120220:{c:"w",f:"FR-B"},120221:{c:"x",f:"FR-B"},120222:{c:"y",f:"FR-B"},120223:{c:"z",f:"FR-B"},120224:{c:"A",f:"SS"},120225:{c:"B",f:"SS"},120226:{c:"C",f:"SS"},120227:{c:"D",f:"SS"},120228:{c:"E",f:"SS"},120229:{c:"F",f:"SS"},120230:{c:"G",f:"SS"},120231:{c:"H",f:"SS"},120232:{c:"I",f:"SS"},120233:{c:"J",f:"SS"},120234:{c:"K",f:"SS"},120235:{c:"L",f:"SS"},120236:{c:"M",f:"SS"},120237:{c:"N",f:"SS"},120238:{c:"O",f:"SS"},120239:{c:"P",f:"SS"},120240:{c:"Q",f:"SS"},120241:{c:"R",f:"SS"},120242:{c:"S",f:"SS"},120243:{c:"T",f:"SS"},120244:{c:"U",f:"SS"},120245:{c:"V",f:"SS"},120246:{c:"W",f:"SS"},120247:{c:"X",f:"SS"},120248:{c:"Y",f:"SS"},120249:{c:"Z",f:"SS"},120250:{c:"a",f:"SS"},120251:{c:"b",f:"SS"},120252:{c:"c",f:"SS"},120253:{c:"d",f:"SS"},120254:{c:"e",f:"SS"},120255:{c:"f",f:"SS"},120256:{c:"g",f:"SS"},120257:{c:"h",f:"SS"},120258:{c:"i",f:"SS"},120259:{c:"j",f:"SS"},120260:{c:"k",f:"SS"},120261:{c:"l",f:"SS"},120262:{c:"m",f:"SS"},120263:{c:"n",f:"SS"},120264:{c:"o",f:"SS"},120265:{c:"p",f:"SS"},120266:{c:"q",f:"SS"},120267:{c:"r",f:"SS"},120268:{c:"s",f:"SS"},120269:{c:"t",f:"SS"},120270:{c:"u",f:"SS"},120271:{c:"v",f:"SS"},120272:{c:"w",f:"SS"},120273:{c:"x",f:"SS"},120274:{c:"y",f:"SS"},120275:{c:"z",f:"SS"},120276:{c:"A",f:"SS-B"},120277:{c:"B",f:"SS-B"},120278:{c:"C",f:"SS-B"},120279:{c:"D",f:"SS-B"},120280:{c:"E",f:"SS-B"},120281:{c:"F",f:"SS-B"},120282:{c:"G",f:"SS-B"},120283:{c:"H",f:"SS-B"},120284:{c:"I",f:"SS-B"},120285:{c:"J",f:"SS-B"},120286:{c:"K",f:"SS-B"},120287:{c:"L",f:"SS-B"},120288:{c:"M",f:"SS-B"},120289:{c:"N",f:"SS-B"},120290:{c:"O",f:"SS-B"},120291:{c:"P",f:"SS-B"},120292:{c:"Q",f:"SS-B"},120293:{c:"R",f:"SS-B"},120294:{c:"S",f:"SS-B"},120295:{c:"T",f:"SS-B"},120296:{c:"U",f:"SS-B"},120297:{c:"V",f:"SS-B"},120298:{c:"W",f:"SS-B"},120299:{c:"X",f:"SS-B"},120300:{c:"Y",f:"SS-B"},120301:{c:"Z",f:"SS-B"},120302:{c:"a",f:"SS-B"},120303:{c:"b",f:"SS-B"},120304:{c:"c",f:"SS-B"},120305:{c:"d",f:"SS-B"},120306:{c:"e",f:"SS-B"},120307:{c:"f",f:"SS-B"},120308:{c:"g",f:"SS-B"},120309:{c:"h",f:"SS-B"},120310:{c:"i",f:"SS-B"},120311:{c:"j",f:"SS-B"},120312:{c:"k",f:"SS-B"},120313:{c:"l",f:"SS-B"},120314:{c:"m",f:"SS-B"},120315:{c:"n",f:"SS-B"},120316:{c:"o",f:"SS-B"},120317:{c:"p",f:"SS-B"},120318:{c:"q",f:"SS-B"},120319:{c:"r",f:"SS-B"},120320:{c:"s",f:"SS-B"},120321:{c:"t",f:"SS-B"},120322:{c:"u",f:"SS-B"},120323:{c:"v",f:"SS-B"},120324:{c:"w",f:"SS-B"},120325:{c:"x",f:"SS-B"},120326:{c:"y",f:"SS-B"},120327:{c:"z",f:"SS-B"},120328:{c:"A",f:"SS-I"},120329:{c:"B",f:"SS-I"},120330:{c:"C",f:"SS-I"},120331:{c:"D",f:"SS-I"},120332:{c:"E",f:"SS-I"},120333:{c:"F",f:"SS-I"},120334:{c:"G",f:"SS-I"},120335:{c:"H",f:"SS-I"},120336:{c:"I",f:"SS-I"},120337:{c:"J",f:"SS-I"},120338:{c:"K",f:"SS-I"},120339:{c:"L",f:"SS-I"},120340:{c:"M",f:"SS-I"},120341:{c:"N",f:"SS-I"},120342:{c:"O",f:"SS-I"},120343:{c:"P",f:"SS-I"},120344:{c:"Q",f:"SS-I"},120345:{c:"R",f:"SS-I"},120346:{c:"S",f:"SS-I"},120347:{c:"T",f:"SS-I"},120348:{c:"U",f:"SS-I"},120349:{c:"V",f:"SS-I"},120350:{c:"W",f:"SS-I"},120351:{c:"X",f:"SS-I"},120352:{c:"Y",f:"SS-I"},120353:{c:"Z",f:"SS-I"},120354:{c:"a",f:"SS-I"},120355:{c:"b",f:"SS-I"},120356:{c:"c",f:"SS-I"},120357:{c:"d",f:"SS-I"},120358:{c:"e",f:"SS-I"},120359:{c:"f",f:"SS-I"},120360:{c:"g",f:"SS-I"},120361:{c:"h",f:"SS-I"},120362:{c:"i",f:"SS-I"},120363:{c:"j",f:"SS-I"},120364:{c:"k",f:"SS-I"},120365:{c:"l",f:"SS-I"},120366:{c:"m",f:"SS-I"},120367:{c:"n",f:"SS-I"},120368:{c:"o",f:"SS-I"},120369:{c:"p",f:"SS-I"},120370:{c:"q",f:"SS-I"},120371:{c:"r",f:"SS-I"},120372:{c:"s",f:"SS-I"},120373:{c:"t",f:"SS-I"},120374:{c:"u",f:"SS-I"},120375:{c:"v",f:"SS-I"},120376:{c:"w",f:"SS-I"},120377:{c:"x",f:"SS-I"},120378:{c:"y",f:"SS-I"},120379:{c:"z",f:"SS-I"},120380:{c:"A",f:"SS-I"},120381:{c:"B",f:"SS-I"},120382:{c:"C",f:"SS-I"},120383:{c:"D",f:"SS-I"},120384:{c:"E",f:"SS-I"},120385:{c:"F",f:"SS-I"},120386:{c:"G",f:"SS-I"},120387:{c:"H",f:"SS-I"},120388:{c:"I",f:"SS-I"},120389:{c:"J",f:"SS-I"},120390:{c:"K",f:"SS-I"},120391:{c:"L",f:"SS-I"},120392:{c:"M",f:"SS-I"},120393:{c:"N",f:"SS-I"},120394:{c:"O",f:"SS-I"},120395:{c:"P",f:"SS-I"},120396:{c:"Q",f:"SS-I"},120397:{c:"R",f:"SS-I"},120398:{c:"S",f:"SS-I"},120399:{c:"T",f:"SS-I"},120400:{c:"U",f:"SS-I"},120401:{c:"V",f:"SS-I"},120402:{c:"W",f:"SS-I"},120403:{c:"X",f:"SS-I"},120404:{c:"Y",f:"SS-I"},120405:{c:"Z",f:"SS-I"},120406:{c:"a",f:"SS-I"},120407:{c:"b",f:"SS-I"},120408:{c:"c",f:"SS-I"},120409:{c:"d",f:"SS-I"},120410:{c:"e",f:"SS-I"},120411:{c:"f",f:"SS-I"},120412:{c:"g",f:"SS-I"},120413:{c:"h",f:"SS-I"},120414:{c:"i",f:"SS-I"},120415:{c:"j",f:"SS-I"},120416:{c:"k",f:"SS-I"},120417:{c:"l",f:"SS-I"},120418:{c:"m",f:"SS-I"},120419:{c:"n",f:"SS-I"},120420:{c:"o",f:"SS-I"},120421:{c:"p",f:"SS-I"},120422:{c:"q",f:"SS-I"},120423:{c:"r",f:"SS-I"},120424:{c:"s",f:"SS-I"},120425:{c:"t",f:"SS-I"},120426:{c:"u",f:"SS-I"},120427:{c:"v",f:"SS-I"},120428:{c:"w",f:"SS-I"},120429:{c:"x",f:"SS-I"},120430:{c:"y",f:"SS-I"},120431:{c:"z",f:"SS-I"},120432:{c:"A",f:"T"},120433:{c:"B",f:"T"},120434:{c:"C",f:"T"},120435:{c:"D",f:"T"},120436:{c:"E",f:"T"},120437:{c:"F",f:"T"},120438:{c:"G",f:"T"},120439:{c:"H",f:"T"},120440:{c:"I",f:"T"},120441:{c:"J",f:"T"},120442:{c:"K",f:"T"},120443:{c:"L",f:"T"},120444:{c:"M",f:"T"},120445:{c:"N",f:"T"},120446:{c:"O",f:"T"},120447:{c:"P",f:"T"},120448:{c:"Q",f:"T"},120449:{c:"R",f:"T"},120450:{c:"S",f:"T"},120451:{c:"T",f:"T"},120452:{c:"U",f:"T"},120453:{c:"V",f:"T"},120454:{c:"W",f:"T"},120455:{c:"X",f:"T"},120456:{c:"Y",f:"T"},120457:{c:"Z",f:"T"},120458:{c:"a",f:"T"},120459:{c:"b",f:"T"},120460:{c:"c",f:"T"},120461:{c:"d",f:"T"},120462:{c:"e",f:"T"},120463:{c:"f",f:"T"},120464:{c:"g",f:"T"},120465:{c:"h",f:"T"},120466:{c:"i",f:"T"},120467:{c:"j",f:"T"},120468:{c:"k",f:"T"},120469:{c:"l",f:"T"},120470:{c:"m",f:"T"},120471:{c:"n",f:"T"},120472:{c:"o",f:"T"},120473:{c:"p",f:"T"},120474:{c:"q",f:"T"},120475:{c:"r",f:"T"},120476:{c:"s",f:"T"},120477:{c:"t",f:"T"},120478:{c:"u",f:"T"},120479:{c:"v",f:"T"},120480:{c:"w",f:"T"},120481:{c:"x",f:"T"},120482:{c:"y",f:"T"},120483:{c:"z",f:"T"},120484:{c:"\\131",f:"MI"},120485:{c:"\\237",f:"MI"},120488:{c:"A",f:"B"},120489:{c:"B",f:"B"},120490:{c:"\\393",f:"B"},120491:{c:"\\394",f:"B"},120492:{c:"E",f:"B"},120493:{c:"Z",f:"B"},120494:{c:"H",f:"B"},120495:{c:"\\398",f:"B"},120496:{c:"I",f:"B"},120497:{c:"K",f:"B"},120498:{c:"\\39B",f:"B"},120499:{c:"M",f:"B"},120500:{c:"N",f:"B"},120501:{c:"\\39E",f:"B"},120502:{c:"O",f:"B"},120503:{c:"\\3A0",f:"B"},120504:{c:"P",f:"B"},120505:{c:"\\398",f:"B"},120506:{c:"\\3A3",f:"B"},120507:{c:"T",f:"B"},120508:{c:"\\3A5",f:"B"},120509:{c:"\\3A6",f:"B"},120510:{c:"X",f:"B"},120511:{c:"\\3A8",f:"B"},120512:{c:"\\3A9",f:"B"},120513:{c:"\\2207",f:"B"},120514:{c:"\\3B1",f:"BI"},120515:{c:"\\3B2",f:"BI"},120516:{c:"\\3B3",f:"BI"},120517:{c:"\\3B4",f:"BI"},120518:{c:"\\3B5",f:"BI"},120519:{c:"\\3B6",f:"BI"},120520:{c:"\\3B7",f:"BI"},120521:{c:"\\3B8",f:"BI"},120522:{c:"\\3B9",f:"BI"},120523:{c:"\\3BA",f:"BI"},120524:{c:"\\3BB",f:"BI"},120525:{c:"\\3BC",f:"BI"},120526:{c:"\\3BD",f:"BI"},120527:{c:"\\3BE",f:"BI"},120528:{c:"\\3BF",f:"BI"},120529:{c:"\\3C0",f:"BI"},120530:{c:"\\3C1",f:"BI"},120531:{c:"\\3C2",f:"BI"},120532:{c:"\\3C3",f:"BI"},120533:{c:"\\3C4",f:"BI"},120534:{c:"\\3C5",f:"BI"},120535:{c:"\\3C6",f:"BI"},120536:{c:"\\3C7",f:"BI"},120537:{c:"\\3C8",f:"BI"},120538:{c:"\\3C9",f:"BI"},120539:{c:"\\2202",f:"BI"},120540:{c:"\\3F5",f:"BI"},120541:{c:"\\3D1",f:"BI"},120542:{c:"\\E009",f:"A"},120543:{c:"\\3D5",f:"BI"},120544:{c:"\\3F1",f:"BI"},120545:{c:"\\3D6",f:"BI"},120546:{c:"A",f:"I"},120547:{c:"B",f:"I"},120548:{c:"\\393",f:"I"},120549:{c:"\\394",f:"I"},120550:{c:"E",f:"I"},120551:{c:"Z",f:"I"},120552:{c:"H",f:"I"},120553:{c:"\\398",f:"I"},120554:{c:"I",f:"I"},120555:{c:"K",f:"I"},120556:{c:"\\39B",f:"I"},120557:{c:"M",f:"I"},120558:{c:"N",f:"I"},120559:{c:"\\39E",f:"I"},120560:{c:"O",f:"I"},120561:{c:"\\3A0",f:"I"},120562:{c:"P",f:"I"},120563:{c:"\\398",f:"I"},120564:{c:"\\3A3",f:"I"},120565:{c:"T",f:"I"},120566:{c:"\\3A5",f:"I"},120567:{c:"\\3A6",f:"I"},120568:{c:"X",f:"I"},120569:{c:"\\3A8",f:"I"},120570:{c:"\\3A9",f:"I"},120571:{c:"\\2207",f:""},120572:{c:"\\3B1",f:"I"},120573:{c:"\\3B2",f:"I"},120574:{c:"\\3B3",f:"I"},120575:{c:"\\3B4",f:"I"},120576:{c:"\\3B5",f:"I"},120577:{c:"\\3B6",f:"I"},120578:{c:"\\3B7",f:"I"},120579:{c:"\\3B8",f:"I"},120580:{c:"\\3B9",f:"I"},120581:{c:"\\3BA",f:"I"},120582:{c:"\\3BB",f:"I"},120583:{c:"\\3BC",f:"I"},120584:{c:"\\3BD",f:"I"},120585:{c:"\\3BE",f:"I"},120586:{c:"\\3BF",f:"I"},120587:{c:"\\3C0",f:"I"},120588:{c:"\\3C1",f:"I"},120589:{c:"\\3C2",f:"I"},120590:{c:"\\3C3",f:"I"},120591:{c:"\\3C4",f:"I"},120592:{c:"\\3C5",f:"I"},120593:{c:"\\3C6",f:"I"},120594:{c:"\\3C7",f:"I"},120595:{c:"\\3C8",f:"I"},120596:{c:"\\3C9",f:"I"},120597:{c:"\\2202",f:""},120598:{c:"\\3F5",f:"I"},120599:{c:"\\3D1",f:"I"},120600:{c:"\\E009",f:"A"},120601:{c:"\\3D5",f:"I"},120602:{c:"\\3F1",f:"I"},120603:{c:"\\3D6",f:"I"},120604:{c:"A",f:"BI"},120605:{c:"B",f:"BI"},120606:{c:"\\393",f:"BI"},120607:{c:"\\394",f:"BI"},120608:{c:"E",f:"BI"},120609:{c:"Z",f:"BI"},120610:{c:"H",f:"BI"},120611:{c:"\\398",f:"BI"},120612:{c:"I",f:"BI"},120613:{c:"K",f:"BI"},120614:{c:"\\39B",f:"BI"},120615:{c:"M",f:"BI"},120616:{c:"N",f:"BI"},120617:{c:"\\39E",f:"BI"},120618:{c:"O",f:"BI"},120619:{c:"\\3A0",f:"BI"},120620:{c:"P",f:"BI"},120621:{c:"\\398",f:"BI"},120622:{c:"\\3A3",f:"BI"},120623:{c:"T",f:"BI"},120624:{c:"\\3A5",f:"BI"},120625:{c:"\\3A6",f:"BI"},120626:{c:"X",f:"BI"},120627:{c:"\\3A8",f:"BI"},120628:{c:"\\3A9",f:"BI"},120629:{c:"\\2207",f:""},120630:{c:"\\3B1",f:"BI"},120631:{c:"\\3B2",f:"BI"},120632:{c:"\\3B3",f:"BI"},120633:{c:"\\3B4",f:"BI"},120634:{c:"\\3B5",f:"BI"},120635:{c:"\\3B6",f:"BI"},120636:{c:"\\3B7",f:"BI"},120637:{c:"\\3B8",f:"BI"},120638:{c:"\\3B9",f:"BI"},120639:{c:"\\3BA",f:"BI"},120640:{c:"\\3BB",f:"BI"},120641:{c:"\\3BC",f:"BI"},120642:{c:"\\3BD",f:"BI"},120643:{c:"\\3BE",f:"BI"},120644:{c:"\\3BF",f:"BI"},120645:{c:"\\3C0",f:"BI"},120646:{c:"\\3C1",f:"BI"},120647:{c:"\\3C2",f:"BI"},120648:{c:"\\3C3",f:"BI"},120649:{c:"\\3C4",f:"BI"},120650:{c:"\\3C5",f:"BI"},120651:{c:"\\3C6",f:"BI"},120652:{c:"\\3C7",f:"BI"},120653:{c:"\\3C8",f:"BI"},120654:{c:"\\3C9",f:"BI"},120655:{c:"\\2202",f:""},120656:{c:"\\3F5",f:"BI"},120657:{c:"\\3D1",f:"BI"},120658:{c:"\\E009",f:"A"},120659:{c:"\\3D5",f:"BI"},120660:{c:"\\3F1",f:"BI"},120661:{c:"\\3D6",f:"BI"},120662:{c:"A",f:"SS-B"},120663:{c:"B",f:"SS-B"},120664:{c:"\\393",f:"SS-B"},120665:{c:"\\394",f:"SS-B"},120666:{c:"E",f:"SS-B"},120667:{c:"Z",f:"SS-B"},120668:{c:"H",f:"SS-B"},120669:{c:"\\398",f:"SS-B"},120670:{c:"I",f:"SS-B"},120671:{c:"K",f:"SS-B"},120672:{c:"\\39B",f:"SS-B"},120673:{c:"M",f:"SS-B"},120674:{c:"N",f:"SS-B"},120675:{c:"\\39E",f:"SS-B"},120676:{c:"O",f:"SS-B"},120677:{c:"\\3A0",f:"SS-B"},120678:{c:"P",f:"SS-B"},120679:{c:"\\398",f:"SS-B"},120680:{c:"\\3A3",f:"SS-B"},120681:{c:"T",f:"SS-B"},120682:{c:"\\3A5",f:"SS-B"},120683:{c:"\\3A6",f:"SS-B"},120684:{c:"X",f:"SS-B"},120685:{c:"\\3A8",f:"SS-B"},120686:{c:"\\3A9",f:"SS-B"},120687:{c:"\\2207",f:""},120688:{c:"\\3B1",f:"BI"},120689:{c:"\\3B2",f:"BI"},120690:{c:"\\3B3",f:"BI"},120691:{c:"\\3B4",f:"BI"},120692:{c:"\\3B5",f:"BI"},120693:{c:"\\3B6",f:"BI"},120694:{c:"\\3B7",f:"BI"},120695:{c:"\\3B8",f:"BI"},120696:{c:"\\3B9",f:"BI"},120697:{c:"\\3BA",f:"BI"},120698:{c:"\\3BB",f:"BI"},120699:{c:"\\3BC",f:"BI"},120700:{c:"\\3BD",f:"BI"},120701:{c:"\\3BE",f:"BI"},120702:{c:"\\3BF",f:"BI"},120703:{c:"\\3C0",f:"BI"},120704:{c:"\\3C1",f:"BI"},120705:{c:"\\3C2",f:"BI"},120706:{c:"\\3C3",f:"BI"},120707:{c:"\\3C4",f:"BI"},120708:{c:"\\3C5",f:"BI"},120709:{c:"\\3C6",f:"BI"},120710:{c:"\\3C7",f:"BI"},120711:{c:"\\3C8",f:"BI"},120712:{c:"\\3C9",f:"BI"},120713:{c:"\\2202",f:""},120714:{c:"\\3F5",f:"BI"},120715:{c:"\\3D1",f:"BI"},120716:{c:"\\E009",f:"A"},120717:{c:"\\3D5",f:"BI"},120718:{c:"\\3F1",f:"BI"},120719:{c:"\\3D6",f:"BI"},120720:{c:"A",f:"SS-I"},120721:{c:"B",f:"SS-I"},120722:{c:"\\393",f:"SS-I"},120723:{c:"\\394",f:"SS-I"},120724:{c:"E",f:"SS-I"},120725:{c:"Z",f:"SS-I"},120726:{c:"H",f:"SS-I"},120727:{c:"\\398",f:"SS-I"},120728:{c:"I",f:"SS-I"},120729:{c:"K",f:"SS-I"},120730:{c:"\\39B",f:"SS-I"},120731:{c:"M",f:"SS-I"},120732:{c:"N",f:"SS-I"},120733:{c:"\\39E",f:"SS-I"},120734:{c:"O",f:"SS-I"},120735:{c:"\\3A0",f:"SS-I"},120736:{c:"P",f:"SS-I"},120737:{c:"\\398",f:"SS-I"},120738:{c:"\\3A3",f:"SS-I"},120739:{c:"T",f:"SS-I"},120740:{c:"\\3A5",f:"SS-I"},120741:{c:"\\3A6",f:"SS-I"},120742:{c:"X",f:"SS-I"},120743:{c:"\\3A8",f:"SS-I"},120744:{c:"\\3A9",f:"SS-I"},120745:{c:"\\2207",f:""},120746:{c:"\\3B1",f:"BI"},120747:{c:"\\3B2",f:"BI"},120748:{c:"\\3B3",f:"BI"},120749:{c:"\\3B4",f:"BI"},120750:{c:"\\3B5",f:"BI"},120751:{c:"\\3B6",f:"BI"},120752:{c:"\\3B7",f:"BI"},120753:{c:"\\3B8",f:"BI"},120754:{c:"\\3B9",f:"BI"},120755:{c:"\\3BA",f:"BI"},120756:{c:"\\3BB",f:"BI"},120757:{c:"\\3BC",f:"BI"},120758:{c:"\\3BD",f:"BI"},120759:{c:"\\3BE",f:"BI"},120760:{c:"\\3BF",f:"BI"},120761:{c:"\\3C0",f:"BI"},120762:{c:"\\3C1",f:"BI"},120763:{c:"\\3C2",f:"BI"},120764:{c:"\\3C3",f:"BI"},120765:{c:"\\3C4",f:"BI"},120766:{c:"\\3C5",f:"BI"},120767:{c:"\\3C6",f:"BI"},120768:{c:"\\3C7",f:"BI"},120769:{c:"\\3C8",f:"BI"},120770:{c:"\\3C9",f:"BI"},120771:{c:"\\2202",f:""},120772:{c:"\\3F5",f:"BI"},120773:{c:"\\3D1",f:"BI"},120774:{c:"\\E009",f:"A"},120775:{c:"\\3D5",f:"BI"},120776:{c:"\\3F1",f:"BI"},120777:{c:"\\3D6",f:"BI"},120778:{c:"F",f:"I"},120779:{c:"\\3DD",f:"A"},120782:{c:"0",f:"B"},120783:{c:"1",f:"B"},120784:{c:"2",f:"B"},120785:{c:"3",f:"B"},120786:{c:"4",f:"B"},120787:{c:"5",f:"B"},120788:{c:"6",f:"B"},120789:{c:"7",f:"B"},120790:{c:"8",f:"B"},120791:{c:"9",f:"B"},120792:{c:"0",f:"B"},120793:{c:"1",f:"B"},120794:{c:"2",f:"B"},120795:{c:"3",f:"B"},120796:{c:"4",f:"B"},120797:{c:"5",f:"B"},120798:{c:"6",f:"B"},120799:{c:"7",f:"B"},120800:{c:"8",f:"B"},120801:{c:"9",f:"B"},120802:{c:"0",f:"SS"},120803:{c:"1",f:"SS"},120804:{c:"2",f:"SS"},120805:{c:"3",f:"SS"},120806:{c:"4",f:"SS"},120807:{c:"5",f:"SS"},120808:{c:"6",f:"SS"},120809:{c:"7",f:"SS"},120810:{c:"8",f:"SS"},120811:{c:"9",f:"SS"},120812:{c:"0",f:"SS-B"},120813:{c:"1",f:"SS-B"},120814:{c:"2",f:"SS-B"},120815:{c:"3",f:"SS-B"},120816:{c:"4",f:"SS-B"},120817:{c:"5",f:"SS-B"},120818:{c:"6",f:"SS-B"},120819:{c:"7",f:"SS-B"},120820:{c:"8",f:"SS-B"},120821:{c:"9",f:"SS-B"},120822:{c:"0",f:"T"},120823:{c:"1",f:"T"},120824:{c:"2",f:"T"},120825:{c:"3",f:"T"},120826:{c:"4",f:"T"},120827:{c:"5",f:"T"},120828:{c:"6",f:"T"},120829:{c:"7",f:"T"},120830:{c:"8",f:"T"},120831:{c:"9",f:"T"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.normal={32:[0,0,.25],33:[.716,0,.278],34:[.694,-.379,.5],35:[.694,.194,.833],36:[.75,.056,.5],37:[.75,.056,.833],38:[.716,.022,.778],39:[.694,-.379,.278],40:[.75,.25,.389],41:[.75,.25,.389],42:[.75,-.32,.5],43:[.583,.082,.778],44:[.121,.194,.278],45:[.252,-.179,.333],46:[.12,0,.278],47:[.75,.25,.5],48:[.666,.022,.5],49:[.666,0,.5],50:[.666,0,.5],51:[.665,.022,.5],52:[.677,0,.5],53:[.666,.022,.5],54:[.666,.022,.5],55:[.676,.022,.5],56:[.666,.022,.5],57:[.666,.022,.5],58:[.43,0,.278],59:[.43,.194,.278],60:[.54,.04,.778],61:[.583,.082,.778],62:[.54,.04,.778],63:[.705,0,.472],64:[.705,.011,.778],65:[.716,0,.75],66:[.683,0,.708],67:[.705,.021,.722],68:[.683,0,.764],69:[.68,0,.681],70:[.68,0,.653],71:[.705,.022,.785],72:[.683,0,.75],73:[.683,0,.361],74:[.683,.022,.514],75:[.683,0,.778],76:[.683,0,.625],77:[.683,0,.917],78:[.683,0,.75],79:[.705,.022,.778],80:[.683,0,.681],81:[.705,.193,.778],82:[.683,.022,.736],83:[.705,.022,.556],84:[.677,0,.722],85:[.683,.022,.75],86:[.683,.022,.75],87:[.683,.022,1.028],88:[.683,0,.75],89:[.683,0,.75],90:[.683,0,.611],91:[.75,.25,.278],92:[.75,.25,.5],93:[.75,.25,.278],94:[.694,-.531,.5],95:[-.025,.062,.5],96:[.699,-.505,.5],97:[.448,.011,.5],98:[.694,.011,.556],99:[.448,.011,.444],100:[.694,.011,.556],101:[.448,.011,.444],102:[.705,0,.306,{ic:.066}],103:[.453,.206,.5],104:[.694,0,.556],105:[.669,0,.278],106:[.669,.205,.306],107:[.694,0,.528],108:[.694,0,.278],109:[.442,0,.833],110:[.442,0,.556],111:[.448,.01,.5],112:[.442,.194,.556],113:[.442,.194,.528],114:[.442,0,.392],115:[.448,.011,.394],116:[.615,.01,.389],117:[.442,.011,.556],118:[.431,.011,.528],119:[.431,.011,.722],120:[.431,0,.528],121:[.431,.204,.528],122:[.431,0,.444],123:[.75,.25,.5],124:[.75,.249,.278],125:[.75,.25,.5],126:[.318,-.215,.5],160:[0,0,.25],163:[.714,.011,.769],165:[.683,0,.75],168:[.669,-.554,.5],172:[.356,-.089,.667],174:[.709,.175,.947],175:[.59,-.544,.5],176:[.715,-.542,.5],177:[.666,0,.778],180:[.699,-.505,.5],183:[.31,-.19,.278],215:[.491,-.009,.778],240:[.749,.021,.556],247:[.537,.036,.778],295:[.695,.013,.54],305:[.442,0,.278,{sk:.0278}],567:[.442,.205,.306,{sk:.0833}],697:[.56,-.043,.275],710:[.694,-.531,.5],711:[.644,-.513,.5],713:[.59,-.544,.5],714:[.699,-.505,.5],715:[.699,-.505,.5],728:[.694,-.515,.5],729:[.669,-.549,.5],730:[.715,-.542,.5],732:[.668,-.565,.5],768:[.699,-.505,0],769:[.699,-.505,0],770:[.694,-.531,0],771:[.668,-.565,0],772:[.59,-.544,0],774:[.694,-.515,0],775:[.669,-.549,0],776:[.669,-.554,0],778:[.715,-.542,0],779:[.701,-.51,0],780:[.644,-.513,0],824:[.716,.215,0],913:[.716,0,.75],914:[.683,0,.708],915:[.68,0,.625],916:[.716,0,.833],917:[.68,0,.681],918:[.683,0,.611],919:[.683,0,.75],920:[.705,.022,.778],921:[.683,0,.361],922:[.683,0,.778],923:[.716,0,.694],924:[.683,0,.917],925:[.683,0,.75],926:[.677,0,.667],927:[.705,.022,.778],928:[.68,0,.75],929:[.683,0,.681],930:[.705,.022,.778],931:[.683,0,.722],932:[.677,0,.722],933:[.705,0,.778],934:[.683,0,.722],935:[.683,0,.75],936:[.683,0,.778],937:[.704,0,.722],978:[.705,0,.778],988:[.68,0,.653],989:[.605,.085,.778],1008:[.434,.006,.667,{ic:.067}],1014:[.44,0,.429],8192:[0,0,.5],8193:[0,0,1],8194:[0,0,.5],8195:[0,0,1],8196:[0,0,.333],8197:[0,0,.25],8198:[0,0,.167],8201:[0,0,.167],8202:[0,0,.1],8203:[0,0,0],8204:[0,0,0],8211:[.285,-.248,.5],8212:[.285,-.248,1],8213:[.285,-.248,1],8214:[.75,.25,.5],8215:[-.025,.062,.5],8216:[.694,-.379,.278],8217:[.694,-.379,.278],8220:[.694,-.379,.5],8221:[.694,-.379,.5],8224:[.705,.216,.444],8225:[.705,.205,.444],8226:[.444,-.055,.5],8230:[.12,0,1.172],8242:[.56,-.043,.275],8243:[.56,0,.55],8244:[.56,0,.825],8245:[.56,-.043,.275],8246:[.56,0,.55],8247:[.56,0,.825],8254:[.59,-.544,.5],8260:[.75,.25,.5],8279:[.56,0,1.1],8289:[0,0,0],8290:[0,0,0],8291:[0,0,0],8292:[0,0,0],8407:[.714,-.516,.5],8450:[.702,.019,.722],8459:[.717,.036,.969,{ic:.272,sk:.333}],8460:[.666,.133,.72],8461:[.683,0,.778],8462:[.694,.011,.576,{sk:-.0278}],8463:[.695,.013,.54],8464:[.717,.314,1.052,{ic:.081,sk:.417}],8465:[.686,.026,.554],8466:[.717,.017,.874,{ic:.161,sk:.306}],8467:[.705,.02,.417,{sk:.111}],8469:[.683,.02,.722],8472:[.453,.216,.636,{sk:.111}],8473:[.683,0,.611],8474:[.701,.181,.778],8475:[.717,.017,.85,{sk:.194}],8476:[.686,.026,.828],8477:[.683,0,.722],8484:[.683,0,.667],8486:[.704,0,.722],8487:[.684,.022,.722],8488:[.729,.139,.602],8492:[.708,.028,.908,{sk:.194}],8493:[.685,.024,.613],8496:[.707,.008,.562,{ic:.156,sk:.139}],8497:[.735,.036,.895,{ic:.095,sk:.222}],8498:[.695,0,.556],8499:[.721,.05,1.08,{ic:.136,sk:.444}],8501:[.694,0,.611],8502:[.763,.021,.667],8503:[.764,.043,.444],8504:[.764,.043,.667],8513:[.705,.023,.639],8592:[.511,.011,1],8593:[.694,.193,.5],8594:[.511,.011,1],8595:[.694,.194,.5],8596:[.511,.011,1],8597:[.772,.272,.5],8598:[.72,.195,1],8599:[.72,.195,1],8600:[.695,.22,1],8601:[.695,.22,1],8602:[.437,-.06,1],8603:[.437,-.06,1],8606:[.417,-.083,1],8608:[.417,-.083,1],8610:[.417,-.083,1.111],8611:[.417,-.083,1.111],8614:[.511,.011,1],8617:[.511,.011,1.126],8618:[.511,.011,1.126],8619:[.575,.041,1],8620:[.575,.041,1],8621:[.417,-.083,1.389],8622:[.437,-.06,1],8624:[.722,0,.5],8625:[.722,0,.5],8630:[.461,0,1],8631:[.46,0,1],8634:[.65,.083,.778],8635:[.65,.083,.778],8636:[.511,-.23,1],8637:[.27,.011,1],8638:[.694,.194,.417],8639:[.694,.194,.417],8640:[.511,-.23,1],8641:[.27,.011,1],8642:[.694,.194,.417],8643:[.694,.194,.417],8644:[.667,0,1],8646:[.667,0,1],8647:[.583,.083,1],8648:[.694,.193,.833],8649:[.583,.083,1],8650:[.694,.194,.833],8651:[.514,.014,1],8652:[.671,.011,1],8653:[.534,.035,1],8654:[.534,.037,1],8655:[.534,.035,1],8656:[.525,.024,1],8657:[.694,.194,.611],8658:[.525,.024,1],8659:[.694,.194,.611],8660:[.526,.025,1],8661:[.772,.272,.611],8666:[.611,.111,1],8667:[.611,.111,1],8669:[.417,-.083,1],8672:[.437,-.064,1.334],8674:[.437,-.064,1.334],8704:[.694,.022,.556],8705:[.846,.021,.5],8706:[.715,.022,.531,{sk:.0833}],8707:[.694,0,.556],8708:[.716,.215,.556],8709:[.772,.078,.5],8710:[.716,0,.833],8711:[.683,.033,.833],8712:[.54,.04,.667],8713:[.716,.215,.667],8715:[.54,.04,.667],8716:[.716,.215,.667],8717:[.44,0,.429],8719:[.75,.25,.944],8720:[.75,.25,.944],8721:[.75,.25,1.056],8722:[.583,.082,.778],8723:[.5,.166,.778],8724:[.766,.093,.778],8725:[.75,.25,.5],8726:[.43,.023,.778],8727:[.465,-.035,.5],8728:[.444,-.055,.5],8729:[.444,-.055,.5],8730:[.8,.2,.833],8733:[.442,.011,.778],8734:[.442,.011,1],8736:[.694,0,.722],8737:[.714,.02,.722],8738:[.551,.051,.722],8739:[.75,.249,.278],8740:[.75,.252,.278],8741:[.75,.25,.5],8742:[.75,.25,.5],8743:[.598,.022,.667],8744:[.598,.022,.667],8745:[.598,.022,.667],8746:[.598,.022,.667],8747:[.716,.216,.417,{ic:.055}],8748:[.805,.306,.819,{ic:.138}],8749:[.805,.306,1.166,{ic:.138}],8750:[.805,.306,.472,{ic:.138}],8756:[.471,.082,.667],8757:[.471,.082,.667],8764:[.367,-.133,.778],8765:[.367,-.133,.778],8768:[.583,.083,.278],8769:[.467,-.032,.778],8770:[.463,-.034,.778],8771:[.464,-.036,.778],8772:[.716,.215,.778],8773:[.589,-.022,.778],8774:[.652,.155,.778],8775:[.652,.155,.778],8776:[.483,-.055,.778],8777:[.716,.215,.778],8778:[.579,.039,.778],8781:[.484,-.016,.778],8782:[.492,-.008,.778],8783:[.492,-.133,.778],8784:[.67,-.133,.778],8785:[.609,.108,.778],8786:[.601,.101,.778],8787:[.601,.102,.778],8790:[.367,-.133,.778],8791:[.721,-.133,.778],8796:[.859,-.133,.778],8800:[.716,.215,.778],8801:[.464,-.036,.778],8802:[.716,.215,.778],8804:[.636,.138,.778],8805:[.636,.138,.778],8806:[.753,.175,.778],8807:[.753,.175,.778],8808:[.752,.286,.778],8809:[.752,.286,.778],8810:[.568,.067,1],8811:[.567,.067,1],8812:[.75,.25,.5],8813:[.716,.215,.778],8814:[.708,.209,.778],8815:[.708,.209,.778],8816:[.801,.303,.778],8817:[.801,.303,.778],8818:[.732,.228,.778],8819:[.732,.228,.778],8820:[.732,.228,.778],8821:[.732,.228,.778],8822:[.681,.253,.778],8823:[.681,.253,.778],8824:[.716,.253,.778],8825:[.716,.253,.778],8826:[.539,.041,.778],8827:[.539,.041,.778],8828:[.58,.153,.778],8829:[.58,.154,.778],8830:[.732,.228,.778],8831:[.732,.228,.778],8832:[.705,.208,.778],8833:[.705,.208,.778],8834:[.54,.04,.778],8835:[.54,.04,.778],8836:[.716,.215,.778],8837:[.716,.215,.778],8838:[.636,.138,.778],8839:[.636,.138,.778],8840:[.801,.303,.778],8841:[.801,.303,.778],8842:[.635,.241,.778],8843:[.635,.241,.778],8846:[.598,.022,.667],8847:[.539,.041,.778],8848:[.539,.041,.778],8849:[.636,.138,.778],8850:[.636,.138,.778],8851:[.598,0,.667],8852:[.598,0,.667],8853:[.583,.083,.778],8854:[.583,.083,.778],8855:[.583,.083,.778],8856:[.583,.083,.778],8857:[.583,.083,.778],8858:[.582,.082,.778],8859:[.582,.082,.778],8861:[.582,.082,.778],8862:[.689,0,.778],8863:[.689,0,.778],8864:[.689,0,.778],8865:[.689,0,.778],8866:[.694,0,.611],8867:[.694,0,.611],8868:[.668,0,.778],8869:[.668,0,.778],8872:[.75,.249,.867],8873:[.694,0,.722],8874:[.694,0,.889],8876:[.695,0,.611],8877:[.695,0,.611],8878:[.695,0,.722],8879:[.695,0,.722],8882:[.539,.041,.778],8883:[.539,.041,.778],8884:[.636,.138,.778],8885:[.636,.138,.778],8888:[.408,-.092,1.111],8890:[.431,.212,.556],8891:[.716,0,.611],8892:[.716,0,.611],8896:[.75,.249,.833],8897:[.75,.249,.833],8898:[.75,.249,.833],8899:[.75,.249,.833],8900:[.488,-.012,.5],8901:[.31,-.19,.278],8902:[.486,-.016,.5],8903:[.545,.044,.778],8904:[.505,.005,.9],8905:[.492,-.008,.778],8906:[.492,-.008,.778],8907:[.694,.022,.778],8908:[.694,.022,.778],8909:[.464,-.036,.778],8910:[.578,.021,.76],8911:[.578,.022,.76],8912:[.54,.04,.778],8913:[.54,.04,.778],8914:[.598,.022,.667],8915:[.598,.022,.667],8916:[.736,.022,.667],8918:[.541,.041,.778],8919:[.541,.041,.778],8920:[.568,.067,1.333],8921:[.568,.067,1.333],8922:[.886,.386,.778],8923:[.886,.386,.778],8926:[.734,0,.778],8927:[.734,0,.778],8928:[.801,.303,.778],8929:[.801,.303,.778],8930:[.716,.215,.778],8931:[.716,.215,.778],8934:[.73,.359,.778],8935:[.73,.359,.778],8936:[.73,.359,.778],8937:[.73,.359,.778],8938:[.706,.208,.778],8939:[.706,.208,.778],8940:[.802,.303,.778],8941:[.801,.303,.778],8942:[1.3,.03,.278],8943:[.31,-.19,1.172],8945:[1.52,-.1,1.282],8965:[.716,0,.611],8966:[.813,.097,.611],8968:[.75,.25,.444],8969:[.75,.25,.444],8970:[.75,.25,.444],8971:[.75,.25,.444],8988:[.694,-.306,.5],8989:[.694,-.306,.5],8990:[.366,.022,.5],8991:[.366,.022,.5],8994:[.388,-.122,1],8995:[.378,-.134,1],9001:[.75,.25,.389],9002:[.75,.25,.389],9136:[.744,.244,.412],9137:[.744,.244,.412],9168:[.602,0,.667],9416:[.709,.175,.902],9484:[.694,-.306,.5],9488:[.694,-.306,.5],9492:[.366,.022,.5],9496:[.366,.022,.5],9585:[.694,.195,.889],9586:[.694,.195,.889],9632:[.689,0,.778],9633:[.689,0,.778],9642:[.689,0,.778],9650:[.575,.02,.722],9651:[.716,0,.889],9652:[.575,.02,.722],9653:[.716,0,.889],9654:[.539,.041,.778],9656:[.539,.041,.778],9657:[.505,.005,.5],9660:[.576,.019,.722],9661:[.5,.215,.889],9662:[.576,.019,.722],9663:[.5,.215,.889],9664:[.539,.041,.778],9666:[.539,.041,.778],9667:[.505,.005,.5],9674:[.716,.132,.667],9711:[.715,.215,1],9723:[.689,0,.778],9724:[.689,0,.778],9733:[.694,.111,.944],9824:[.727,.13,.778],9825:[.716,.033,.778],9826:[.727,.162,.778],9827:[.726,.13,.778],9837:[.75,.022,.389],9838:[.734,.223,.389],9839:[.723,.223,.389],10003:[.706,.034,.833],10016:[.716,.022,.833],10072:[.75,.249,.278],10216:[.75,.25,.389],10217:[.75,.25,.389],10222:[.744,.244,.412],10223:[.744,.244,.412],10229:[.511,.011,1.609],10230:[.511,.011,1.638],10231:[.511,.011,1.859],10232:[.525,.024,1.609],10233:[.525,.024,1.638],10234:[.525,.024,1.858],10236:[.511,.011,1.638],10731:[.716,.132,.667],10744:[.716,.215,.778],10752:[.75,.25,1.111],10753:[.75,.25,1.111],10754:[.75,.25,1.111],10756:[.75,.249,.833],10758:[.75,.249,.833],10764:[.805,.306,1.638,{ic:.138}],10799:[.491,-.009,.778],10815:[.683,0,.75],10846:[.813,.097,.611],10877:[.636,.138,.778],10878:[.636,.138,.778],10885:[.762,.29,.778],10886:[.762,.29,.778],10887:[.635,.241,.778],10888:[.635,.241,.778],10889:[.761,.387,.778],10890:[.761,.387,.778],10891:[1.003,.463,.778],10892:[1.003,.463,.778],10901:[.636,.138,.778],10902:[.636,.138,.778],10927:[.636,.138,.778],10928:[.636,.138,.778],10933:[.752,.286,.778],10934:[.752,.286,.778],10935:[.761,.294,.778],10936:[.761,.294,.778],10937:[.761,.337,.778],10938:[.761,.337,.778],10949:[.753,.215,.778],10950:[.753,.215,.778],10955:[.783,.385,.778],10956:[.783,.385,.778],12296:[.75,.25,.389],12297:[.75,.25,.389],57350:[.43,.023,.222],57351:[.431,.024,.389],57352:[.605,.085,.778],57353:[.434,.006,.667,{ic:.067}],57356:[.752,.284,.778],57357:[.752,.284,.778],57358:[.919,.421,.778],57359:[.801,.303,.778],57360:[.801,.303,.778],57361:[.919,.421,.778],57366:[.828,.33,.778],57367:[.752,.332,.778],57368:[.828,.33,.778],57369:[.752,.333,.778],57370:[.634,.255,.778],57371:[.634,.254,.778],119808:[.698,0,.869],119809:[.686,0,.818],119810:[.697,.011,.831],119811:[.686,0,.882],119812:[.68,0,.756],119813:[.68,0,.724],119814:[.697,.01,.904],119815:[.686,0,.9],119816:[.686,0,.436],119817:[.686,.011,.594],119818:[.686,0,.901],119819:[.686,0,.692],119820:[.686,0,1.092],119821:[.686,0,.9],119822:[.696,.01,.864],119823:[.686,0,.786],119824:[.696,.193,.864],119825:[.686,.011,.862],119826:[.697,.011,.639],119827:[.675,0,.8],119828:[.686,.011,.885],119829:[.686,.007,.869],119830:[.686,.007,1.189],119831:[.686,0,.869],119832:[.686,0,.869],119833:[.686,0,.703],119834:[.453,.006,.559],119835:[.694,.006,.639],119836:[.453,.006,.511],119837:[.694,.006,.639],119838:[.452,.006,.527],119839:[.7,0,.351,{ic:.101}],119840:[.455,.201,.575],119841:[.694,0,.639],119842:[.695,0,.319],119843:[.695,.2,.351],119844:[.694,0,.607],119845:[.694,0,.319],119846:[.45,0,.958],119847:[.45,0,.639],119848:[.452,.005,.575],119849:[.45,.194,.639],119850:[.45,.194,.607],119851:[.45,0,.474],119852:[.453,.006,.454],119853:[.635,.005,.447],119854:[.45,.006,.639],119855:[.444,0,.607],119856:[.444,0,.831],119857:[.444,0,.607],119858:[.444,.2,.607],119859:[.444,0,.511],119860:[.716,0,.75,{sk:.139}],119861:[.683,0,.759,{sk:.0833}],119862:[.705,.022,.715,{sk:.0833}],119863:[.683,0,.828,{sk:.0556}],119864:[.68,0,.738,{sk:.0833}],119865:[.68,0,.643,{ic:.106,sk:.0833}],119866:[.705,.022,.786,{sk:.0833}],119867:[.683,0,.831,{ic:.057,sk:.0556}],119868:[.683,0,.44,{ic:.064,sk:.111}],119869:[.683,.022,.555,{ic:.078,sk:.167}],119870:[.683,0,.849,{sk:.0556}],119871:[.683,0,.681,{sk:.0278}],119872:[.683,0,.97,{ic:.081,sk:.0833}],119873:[.683,0,.803,{ic:.085,sk:.0833}],119874:[.704,.022,.763,{sk:.0833}],119875:[.683,0,.642,{ic:.109,sk:.0833}],119876:[.704,.194,.791,{sk:.0833}],119877:[.683,.021,.759,{sk:.0833}],119878:[.705,.022,.613,{sk:.0833}],119879:[.677,0,.584,{ic:.12,sk:.0833}],119880:[.683,.022,.683,{ic:.084,sk:.0278}],119881:[.683,.022,.583,{ic:.186}],119882:[.683,.022,.944,{ic:.104}],119883:[.683,0,.828,{sk:.0833}],119884:[.683,0,.581,{ic:.182}],119885:[.683,0,.683,{sk:.0833}],119886:[.441,.01,.529],119887:[.694,.011,.429],119888:[.442,.011,.433,{sk:.0556}],119889:[.694,.01,.52,{sk:.167}],119890:[.442,.011,.466,{sk:.0556}],119891:[.705,.205,.49,{ic:.06,sk:.167}],119892:[.442,.205,.477,{sk:.0278}],119893:[.694,.011,.576,{sk:-.0278}],119894:[.661,.011,.345],119895:[.661,.204,.412],119896:[.694,.011,.521],119897:[.694,.011,.298,{sk:.0833}],119898:[.442,.011,.878],119899:[.442,.011,.6],119900:[.441,.011,.485,{sk:.0556}],119901:[.442,.194,.503,{sk:.0833}],119902:[.442,.194,.446,{sk:.0833}],119903:[.442,.011,.451,{sk:.0556}],119904:[.442,.01,.469,{sk:.0556}],119905:[.626,.011,.361,{sk:.0833}],119906:[.442,.011,.572,{sk:.0278}],119907:[.443,.011,.485,{sk:.0278}],119908:[.443,.011,.716,{sk:.0833}],119909:[.442,.011,.572,{sk:.0278}],119910:[.442,.205,.49,{sk:.0556}],119911:[.442,.011,.465,{sk:.0556}],119912:[.711,0,.869,{sk:.16}],119913:[.686,0,.866,{sk:.0958}],119914:[.703,.017,.817,{sk:.0958}],119915:[.686,0,.938,{sk:.0639}],119916:[.68,0,.81,{sk:.0958}],119917:[.68,0,.689,{ic:.12,sk:.0958}],119918:[.703,.016,.887,{sk:.0958}],119919:[.686,0,.982,{sk:.0639}],119920:[.686,0,.511,{ic:.062,sk:.128}],119921:[.686,.017,.631,{ic:.063,sk:.192}],119922:[.686,0,.971,{sk:.0639}],119923:[.686,0,.756,{sk:.0319}],119924:[.686,0,1.142,{ic:.077,sk:.0958}],119925:[.686,0,.95,{ic:.077,sk:.0958}],119926:[.703,.017,.837,{sk:.0958}],119927:[.686,0,.723,{ic:.124,sk:.0958}],119928:[.703,.194,.869,{sk:.0958}],119929:[.686,.017,.872,{sk:.0958}],119930:[.703,.017,.693,{sk:.0958}],119931:[.675,0,.637,{ic:.135,sk:.0958}],119932:[.686,.016,.8,{ic:.077,sk:.0319}],119933:[.686,.016,.678,{ic:.208}],119934:[.686,.017,1.093,{ic:.114}],119935:[.686,0,.947,{sk:.0958}],119936:[.686,0,.675,{ic:.201}],119937:[.686,0,.773,{sk:.0958}],119938:[.452,.008,.633],119939:[.694,.008,.521],119940:[.451,.008,.513,{sk:.0639}],119941:[.694,.008,.61,{sk:.192}],119942:[.452,.008,.554,{sk:.0639}],119943:[.701,.201,.568,{ic:.056,sk:.192}],119944:[.452,.202,.545,{sk:.0319}],119945:[.694,.008,.668,{sk:-.0319}],119946:[.694,.008,.405],119947:[.694,.202,.471],119948:[.694,.008,.604],119949:[.694,.008,.348,{sk:.0958}],119950:[.452,.008,1.032],119951:[.452,.008,.713],119952:[.452,.008,.585,{sk:.0639}],119953:[.452,.194,.601,{sk:.0958}],119954:[.452,.194,.542,{sk:.0958}],119955:[.452,.008,.529,{sk:.0639}],119956:[.451,.008,.531,{sk:.0639}],119957:[.643,.007,.415,{sk:.0958}],119958:[.452,.008,.681,{sk:.0319}],119959:[.453,.008,.567,{sk:.0319}],119960:[.453,.008,.831,{sk:.0958}],119961:[.452,.008,.659,{sk:.0319}],119962:[.452,.202,.59,{sk:.0639}],119963:[.452,.008,.555,{sk:.0639}],119964:[.717,.008,.803,{ic:.213,sk:.389}],119965:[.708,.028,.908,{sk:.194}],119966:[.728,.026,.666,{ic:.153,sk:.278}],119967:[.708,.031,.774,{ic:.081,sk:.111}],119968:[.707,.008,.562,{ic:.156,sk:.139}],119969:[.735,.036,.895,{ic:.095,sk:.222}],119970:[.717,.037,.61,{ic:.128,sk:.25}],119971:[.717,.036,.969,{ic:.272,sk:.333}],119972:[.717,.017,.809,{ic:.137,sk:.333}],119973:[.717,.314,1.052,{ic:.081,sk:.417}],119974:[.717,.037,.914,{ic:.29,sk:.361}],119975:[.717,.017,.874,{ic:.161,sk:.306}],119976:[.721,.05,1.08,{ic:.136,sk:.444}],119977:[.726,.036,.902,{ic:.306,sk:.389}],119978:[.707,.008,.738,{ic:.067,sk:.167}],119979:[.716,.037,1.013,{sk:.222}],119980:[.717,.017,.883,{sk:.278}],119981:[.717,.017,.85,{sk:.194}],119982:[.708,.036,.868,{ic:.148,sk:.333}],119983:[.735,.037,.747,{ic:.249,sk:.222}],119984:[.717,.017,.8,{ic:.16,sk:.25}],119985:[.717,.017,.622,{ic:.228,sk:.222}],119986:[.717,.017,.805,{ic:.221,sk:.25}],119987:[.717,.017,.944,{ic:.187,sk:.278}],119988:[.716,.017,.71,{ic:.249,sk:.194}],119989:[.717,.016,.821,{ic:.211,sk:.306}],119990:[.441,.01,.529],119991:[.694,.011,.429],119992:[.442,.011,.433,{sk:.0556}],119993:[.694,.01,.52,{sk:.167}],119994:[.442,.011,.466,{sk:.0556}],119995:[.705,.205,.49,{ic:.06,sk:.167}],119996:[.442,.205,.477,{sk:.0278}],119997:[.694,.011,.576,{sk:-.0278}],119998:[.661,.011,.345],119999:[.661,.204,.412],12e4:[.694,.011,.521],120001:[.694,.011,.298,{sk:.0833}],120002:[.442,.011,.878],120003:[.442,.011,.6],120004:[.441,.011,.485,{sk:.0556}],120005:[.442,.194,.503,{sk:.0833}],120006:[.442,.194,.446,{sk:.0833}],120007:[.442,.011,.451,{sk:.0556}],120008:[.442,.01,.469,{sk:.0556}],120009:[.626,.011,.361,{sk:.0833}],120010:[.442,.011,.572,{sk:.0278}],120011:[.443,.011,.485,{sk:.0278}],120012:[.443,.011,.716,{sk:.0833}],120013:[.442,.011,.572,{sk:.0278}],120014:[.442,.205,.49,{sk:.0556}],120015:[.442,.011,.465,{sk:.0556}],120016:[.717,.008,.803,{ic:.213,sk:.389}],120017:[.708,.028,.908,{sk:.194}],120018:[.728,.026,.666,{ic:.153,sk:.278}],120019:[.708,.031,.774,{ic:.081,sk:.111}],120020:[.707,.008,.562,{ic:.156,sk:.139}],120021:[.735,.036,.895,{ic:.095,sk:.222}],120022:[.717,.037,.61,{ic:.128,sk:.25}],120023:[.717,.036,.969,{ic:.272,sk:.333}],120024:[.717,.017,.809,{ic:.137,sk:.333}],120025:[.717,.314,1.052,{ic:.081,sk:.417}],120026:[.717,.037,.914,{ic:.29,sk:.361}],120027:[.717,.017,.874,{ic:.161,sk:.306}],120028:[.721,.05,1.08,{ic:.136,sk:.444}],120029:[.726,.036,.902,{ic:.306,sk:.389}],120030:[.707,.008,.738,{ic:.067,sk:.167}],120031:[.716,.037,1.013,{sk:.222}],120032:[.717,.017,.883,{sk:.278}],120033:[.717,.017,.85,{sk:.194}],120034:[.708,.036,.868,{ic:.148,sk:.333}],120035:[.735,.037,.747,{ic:.249,sk:.222}],120036:[.717,.017,.8,{ic:.16,sk:.25}],120037:[.717,.017,.622,{ic:.228,sk:.222}],120038:[.717,.017,.805,{ic:.221,sk:.25}],120039:[.717,.017,.944,{ic:.187,sk:.278}],120040:[.716,.017,.71,{ic:.249,sk:.194}],120041:[.717,.016,.821,{ic:.211,sk:.306}],120042:[.452,.008,.633],120043:[.694,.008,.521],120044:[.451,.008,.513,{sk:.0639}],120045:[.694,.008,.61,{sk:.192}],120046:[.452,.008,.554,{sk:.0639}],120047:[.701,.201,.568,{ic:.056,sk:.192}],120048:[.452,.202,.545,{sk:.0319}],120049:[.694,.008,.668,{sk:-.0319}],120050:[.694,.008,.405],120051:[.694,.202,.471],120052:[.694,.008,.604],120053:[.694,.008,.348,{sk:.0958}],120054:[.452,.008,1.032],120055:[.452,.008,.713],120056:[.452,.008,.585,{sk:.0639}],120057:[.452,.194,.601,{sk:.0958}],120058:[.452,.194,.542,{sk:.0958}],120059:[.452,.008,.529,{sk:.0639}],120060:[.451,.008,.531,{sk:.0639}],120061:[.643,.007,.415,{sk:.0958}],120062:[.452,.008,.681,{sk:.0319}],120063:[.453,.008,.567,{sk:.0319}],120064:[.453,.008,.831,{sk:.0958}],120065:[.452,.008,.659,{sk:.0319}],120066:[.452,.202,.59,{sk:.0639}],120067:[.452,.008,.555,{sk:.0639}],120068:[.696,.026,.718],120069:[.691,.027,.884],120070:[.685,.024,.613],120071:[.685,.027,.832],120072:[.685,.024,.663],120073:[.686,.153,.611],120074:[.69,.026,.785],120075:[.666,.133,.72],120076:[.686,.026,.554],120077:[.686,.139,.552],120078:[.68,.027,.668],120079:[.686,.026,.666],120080:[.692,.027,1.05],120081:[.686,.025,.832],120082:[.729,.027,.827],120083:[.692,.218,.828],120084:[.729,.069,.827],120085:[.686,.026,.828],120086:[.692,.027,.829],120087:[.701,.027,.669],120088:[.697,.027,.646],120089:[.686,.026,.831],120090:[.686,.027,1.046],120091:[.688,.027,.719],120092:[.686,.218,.833],120093:[.729,.139,.602],120094:[.47,.035,.5],120095:[.685,.031,.513],120096:[.466,.029,.389],120097:[.609,.033,.499],120098:[.467,.03,.401],120099:[.681,.221,.326],120100:[.47,.209,.504],120101:[.688,.205,.521],120102:[.673,.02,.279],120103:[.672,.208,.281],120104:[.689,.025,.389],120105:[.685,.02,.28],120106:[.475,.026,.767],120107:[.475,.022,.527],120108:[.48,.028,.489],120109:[.541,.212,.5],120110:[.479,.219,.489],120111:[.474,.021,.389],120112:[.478,.029,.443],120113:[.64,.02,.333],120114:[.474,.023,.517],120115:[.53,.028,.512],120116:[.532,.028,.774],120117:[.472,.188,.389],120118:[.528,.218,.499],120119:[.471,.214,.391],120120:[.701,0,.722],120121:[.683,0,.667],120122:[.702,.019,.722],120123:[.683,0,.722],120124:[.683,0,.667],120125:[.683,0,.611],120126:[.702,.019,.778],120127:[.683,0,.778],120128:[.683,0,.389],120129:[.683,.077,.5],120130:[.683,0,.778],120131:[.683,0,.667],120132:[.683,0,.944],120133:[.683,.02,.722],120134:[.701,.019,.778],120135:[.683,0,.611],120136:[.701,.181,.778],120137:[.683,0,.722],120138:[.702,.012,.556],120139:[.683,0,.667],120140:[.683,.019,.722],120141:[.683,.02,.722],120142:[.683,.019,1],120143:[.683,0,.722],120144:[.683,0,.722],120145:[.683,0,.667],120146:[.453,.006,.559],120147:[.694,.006,.639],120148:[.453,.006,.511],120149:[.694,.006,.639],120150:[.452,.006,.527],120151:[.7,0,.351,{ic:.101}],120152:[.455,.201,.575],120153:[.694,0,.639],120154:[.695,0,.319],120155:[.695,.2,.351],120156:[.683,0,.556],120157:[.694,0,.319],120158:[.45,0,.958],120159:[.45,0,.639],120160:[.452,.005,.575],120161:[.45,.194,.639],120162:[.45,.194,.607],120163:[.45,0,.474],120164:[.453,.006,.454],120165:[.635,.005,.447],120166:[.45,.006,.639],120167:[.444,0,.607],120168:[.444,0,.831],120169:[.444,0,.607],120170:[.444,.2,.607],120171:[.444,0,.511],120172:[.686,.031,.847],120173:[.684,.031,1.044],120174:[.676,.032,.723],120175:[.683,.029,.982],120176:[.686,.029,.783],120177:[.684,.146,.722],120178:[.687,.029,.927],120179:[.683,.126,.851],120180:[.681,.025,.655],120181:[.68,.141,.652],120182:[.681,.026,.789],120183:[.683,.028,.786],120184:[.683,.032,1.239],120185:[.679,.03,.983],120186:[.726,.03,.976],120187:[.688,.223,.977],120188:[.726,.083,.976],120189:[.688,.028,.978],120190:[.685,.031,.978],120191:[.686,.03,.79],120192:[.688,.039,.851],120193:[.685,.029,.982],120194:[.683,.03,1.235],120195:[.681,.035,.849],120196:[.688,.214,.984],120197:[.677,.148,.711],120198:[.472,.032,.603],120199:[.69,.032,.59],120200:[.473,.026,.464],120201:[.632,.028,.589],120202:[.471,.027,.472],120203:[.687,.222,.388],120204:[.472,.208,.595],120205:[.687,.207,.615],120206:[.686,.025,.331],120207:[.682,.203,.332],120208:[.682,.025,.464],120209:[.681,.024,.337],120210:[.476,.031,.921],120211:[.473,.028,.654],120212:[.482,.034,.609],120213:[.557,.207,.604],120214:[.485,.211,.596],120215:[.472,.026,.46],120216:[.479,.034,.523],120217:[.648,.027,.393],120218:[.472,.032,.589],120219:[.546,.027,.604],120220:[.549,.032,.918],120221:[.471,.188,.459],120222:[.557,.221,.589],120223:[.471,.214,.461],120224:[.694,0,.667],120225:[.694,0,.667],120226:[.705,.011,.639],120227:[.694,0,.722],120228:[.691,0,.597],120229:[.691,0,.569],120230:[.704,.011,.667],120231:[.694,0,.708],120232:[.694,0,.278],120233:[.694,.022,.472],120234:[.694,0,.694],120235:[.694,0,.542],120236:[.694,0,.875],120237:[.694,0,.708],120238:[.715,.022,.736],120239:[.694,0,.639],120240:[.715,.125,.736],120241:[.694,0,.646],120242:[.716,.022,.556],120243:[.688,0,.681],120244:[.694,.022,.688],120245:[.694,0,.667],120246:[.694,0,.944],120247:[.694,0,.667],120248:[.694,0,.667],120249:[.694,0,.611],120250:[.46,.01,.481],120251:[.694,.011,.517],120252:[.46,.01,.444],120253:[.694,.01,.517],120254:[.461,.01,.444],120255:[.705,0,.306],120256:[.455,.206,.5],120257:[.694,0,.517],120258:[.68,0,.239],120259:[.68,.205,.267],120260:[.694,0,.489],120261:[.694,0,.239],120262:[.455,0,.794],120263:[.455,0,.517],120264:[.46,.01,.5],120265:[.455,.194,.517],120266:[.455,.194,.517],120267:[.455,0,.342],120268:[.46,.01,.383],120269:[.571,.01,.361],120270:[.444,.01,.517],120271:[.444,0,.461],120272:[.444,0,.683],120273:[.444,0,.461],120274:[.444,.204,.461],120275:[.444,0,.435],120276:[.694,0,.733],120277:[.694,0,.733],120278:[.704,.011,.703],120279:[.694,0,.794],120280:[.691,0,.642],120281:[.691,0,.611],120282:[.705,.011,.733],120283:[.694,0,.794],120284:[.694,0,.331],120285:[.694,.022,.519],120286:[.694,0,.764],120287:[.694,0,.581],120288:[.694,0,.978],120289:[.694,0,.794],120290:[.716,.022,.794],120291:[.694,0,.703],120292:[.716,.106,.794],120293:[.694,0,.703],120294:[.716,.022,.611],120295:[.688,0,.733],120296:[.694,.022,.764],120297:[.694,0,.733],120298:[.694,0,1.039],120299:[.694,0,.733],120300:[.694,0,.733],120301:[.694,0,.672],120302:[.475,.011,.525],120303:[.694,.01,.561],120304:[.475,.011,.489],120305:[.694,.011,.561],120306:[.474,.01,.511],120307:[.705,0,.336],120308:[.469,.206,.55],120309:[.694,0,.561],120310:[.695,0,.256],120311:[.695,.205,.286],120312:[.694,0,.531],120313:[.694,0,.256],120314:[.469,0,.867],120315:[.468,0,.561],120316:[.474,.011,.55],120317:[.469,.194,.561],120318:[.469,.194,.561],120319:[.469,0,.372],120320:[.474,.01,.422],120321:[.589,.01,.404],120322:[.458,.011,.561],120323:[.458,0,.5],120324:[.458,0,.744],120325:[.458,0,.5],120326:[.458,.205,.5],120327:[.458,0,.476],120328:[.694,0,.667],120329:[.694,0,.667],120330:[.705,.01,.639,{ic:.08}],120331:[.694,0,.722],120332:[.691,0,.597,{ic:.091}],120333:[.691,0,.569,{ic:.104}],120334:[.705,.011,.667,{ic:.063}],120335:[.694,0,.708,{ic:.06}],120336:[.694,0,.278,{ic:.06}],120337:[.694,.022,.472,{ic:.063}],120338:[.694,0,.694,{ic:.091}],120339:[.694,0,.542],120340:[.694,0,.875,{ic:.054}],120341:[.694,0,.708,{ic:.058}],120342:[.716,.022,.736],120343:[.694,0,.639,{ic:.051}],120344:[.716,.125,.736],120345:[.694,0,.646,{ic:.052}],120346:[.716,.022,.556,{ic:.053}],120347:[.688,0,.681,{ic:.109}],120348:[.694,.022,.688,{ic:.059}],120349:[.694,0,.667,{ic:.132}],120350:[.694,0,.944,{ic:.132}],120351:[.694,0,.667,{ic:.091}],120352:[.694,0,.667,{ic:.143}],120353:[.694,0,.611,{ic:.091}],120354:[.461,.01,.481],120355:[.694,.011,.517],120356:[.46,.011,.444,{ic:.055}],120357:[.694,.01,.517,{ic:.071}],120358:[.46,.011,.444],120359:[.705,0,.306,{ic:.188}],120360:[.455,.206,.5,{ic:.068}],120361:[.694,0,.517],120362:[.68,0,.239,{ic:.076}],120363:[.68,.204,.267,{ic:.069}],120364:[.694,0,.489,{ic:.054}],120365:[.694,0,.239,{ic:.072}],120366:[.455,0,.794],120367:[.454,0,.517],120368:[.461,.011,.5],120369:[.455,.194,.517],120370:[.455,.194,.517],120371:[.455,0,.342,{ic:.082}],120372:[.461,.011,.383,{ic:.053}],120373:[.571,.011,.361],120374:[.444,.01,.517],120375:[.444,0,.461,{ic:.079}],120376:[.444,0,.683,{ic:.079}],120377:[.444,0,.461,{ic:.076}],120378:[.444,.205,.461,{ic:.079}],120379:[.444,0,.435,{ic:.059}],120380:[.694,0,.667],120381:[.694,0,.667],120382:[.705,.01,.639,{ic:.08}],120383:[.694,0,.722],120384:[.691,0,.597,{ic:.091}],120385:[.691,0,.569,{ic:.104}],120386:[.705,.011,.667,{ic:.063}],120387:[.694,0,.708,{ic:.06}],120388:[.694,0,.278,{ic:.06}],120389:[.694,.022,.472,{ic:.063}],120390:[.694,0,.694,{ic:.091}],120391:[.694,0,.542],120392:[.694,0,.875,{ic:.054}],120393:[.694,0,.708,{ic:.058}],120394:[.716,.022,.736],120395:[.694,0,.639,{ic:.051}],120396:[.716,.125,.736],120397:[.694,0,.646,{ic:.052}],120398:[.716,.022,.556,{ic:.053}],120399:[.688,0,.681,{ic:.109}],120400:[.694,.022,.688,{ic:.059}],120401:[.694,0,.667,{ic:.132}],120402:[.694,0,.944,{ic:.132}],120403:[.694,0,.667,{ic:.091}],120404:[.694,0,.667,{ic:.143}],120405:[.694,0,.611,{ic:.091}],120406:[.461,.01,.481],120407:[.694,.011,.517],120408:[.46,.011,.444,{ic:.055}],120409:[.694,.01,.517,{ic:.071}],120410:[.46,.011,.444],120411:[.705,0,.306,{ic:.188}],120412:[.455,.206,.5,{ic:.068}],120413:[.694,0,.517],120414:[.68,0,.239,{ic:.076}],120415:[.68,.204,.267,{ic:.069}],120416:[.694,0,.489,{ic:.054}],120417:[.694,0,.239,{ic:.072}],120418:[.455,0,.794],120419:[.454,0,.517],120420:[.461,.011,.5],120421:[.455,.194,.517],120422:[.455,.194,.517],120423:[.455,0,.342,{ic:.082}],120424:[.461,.011,.383,{ic:.053}],120425:[.571,.011,.361],120426:[.444,.01,.517],120427:[.444,0,.461,{ic:.079}],120428:[.444,0,.683,{ic:.079}],120429:[.444,0,.461,{ic:.076}],120430:[.444,.205,.461,{ic:.079}],120431:[.444,0,.435,{ic:.059}],120432:[.623,0,.525],120433:[.611,0,.525],120434:[.622,.011,.525],120435:[.611,0,.525],120436:[.611,0,.525],120437:[.611,0,.525],120438:[.622,.011,.525],120439:[.611,0,.525],120440:[.611,0,.525],120441:[.611,.011,.525],120442:[.611,0,.525],120443:[.611,0,.525],120444:[.611,0,.525],120445:[.611,0,.525],120446:[.621,.01,.525],120447:[.611,0,.525],120448:[.621,.138,.525],120449:[.611,.011,.525],120450:[.622,.011,.525],120451:[.611,0,.525],120452:[.611,.011,.525],120453:[.611,.007,.525],120454:[.611,.007,.525],120455:[.611,0,.525],120456:[.611,0,.525],120457:[.611,0,.525],120458:[.439,.006,.525],120459:[.611,.006,.525],120460:[.44,.006,.525],120461:[.611,.006,.525],120462:[.44,.006,.525],120463:[.617,0,.525],120464:[.442,.229,.525],120465:[.611,0,.525],120466:[.612,0,.525],120467:[.612,.228,.525],120468:[.611,0,.525],120469:[.611,0,.525],120470:[.436,0,.525],120471:[.436,0,.525],120472:[.44,.006,.525],120473:[.437,.221,.525],120474:[.437,.221,.525],120475:[.437,0,.525],120476:[.44,.006,.525],120477:[.554,.006,.525],120478:[.431,.005,.525],120479:[.431,0,.525],120480:[.431,0,.525],120481:[.431,0,.525],120482:[.431,.228,.525],120483:[.431,0,.525],120484:[.441,.01,.307],120485:[.442,.204,.332],120488:[.698,0,.869],120489:[.686,0,.818],120490:[.68,0,.692],120491:[.698,0,.958],120492:[.68,0,.756],120493:[.686,0,.703],120494:[.686,0,.9],120495:[.696,.01,.894],120496:[.686,0,.436],120497:[.686,0,.901],120498:[.698,0,.806],120499:[.686,0,1.092],120500:[.686,0,.9],120501:[.675,0,.767],120502:[.696,.01,.864],120503:[.68,0,.9],120504:[.686,0,.786],120505:[.696,.01,.894],120506:[.686,0,.831],120507:[.675,0,.8],120508:[.697,0,.894],120509:[.686,0,.831],120510:[.686,0,.869],120511:[.686,0,.894],120512:[.696,0,.831],120513:[.686,.024,.958],120514:[.452,.008,.761,{sk:.0319}],120515:[.701,.194,.66,{sk:.0958}],120516:[.451,.211,.59],120517:[.725,.008,.522,{sk:.0639}],120518:[.461,.017,.529,{sk:.0958}],120519:[.711,.202,.508,{sk:.0958}],120520:[.452,.211,.6,{sk:.0639}],120521:[.702,.008,.562,{sk:.0958}],120522:[.452,.008,.412,{sk:.0639}],120523:[.452,.008,.668],120524:[.694,.013,.671],120525:[.452,.211,.708,{sk:.0319}],120526:[.452,0,.577,{sk:.0319}],120527:[.711,.201,.508,{sk:.128}],120528:[.452,.008,.585,{sk:.0639}],120529:[.444,.008,.682],120530:[.451,.211,.612,{sk:.0958}],120531:[.451,.105,.424,{sk:.0958}],120532:[.444,.008,.686],120533:[.444,.013,.521,{ic:.089,sk:.0319}],120534:[.453,.008,.631,{sk:.0319}],120535:[.452,.216,.747,{sk:.0958}],120536:[.452,.201,.718,{sk:.0639}],120537:[.694,.202,.758,{sk:.128}],120538:[.453,.008,.718],120539:[.71,.017,.628,{sk:.0958}],120540:[.444,.007,.483,{sk:.0639}],120541:[.701,.008,.692,{sk:.0958}],120542:[.434,.006,.667,{ic:.067}],120543:[.694,.202,.712,{sk:.0958}],120544:[.451,.194,.612,{sk:.0958}],120545:[.444,.008,.975],120546:[.716,0,.75,{sk:.139}],120547:[.683,0,.759,{sk:.0833}],120548:[.68,0,.615,{ic:.106,sk:.0833}],120549:[.716,0,.833,{sk:.167}],120550:[.68,0,.738,{sk:.0833}],120551:[.683,0,.683,{sk:.0833}],120552:[.683,0,.831,{ic:.057,sk:.0556}],120553:[.704,.022,.763,{sk:.0833}],120554:[.683,0,.44,{ic:.064,sk:.111}],120555:[.683,0,.849,{sk:.0556}],120556:[.716,0,.694,{sk:.167}],120557:[.683,0,.97,{ic:.081,sk:.0833}],120558:[.683,0,.803,{ic:.085,sk:.0833}],120559:[.677,0,.742,{sk:.0833}],120560:[.704,.022,.763,{sk:.0833}],120561:[.68,0,.831,{ic:.056,sk:.0556}],120562:[.683,0,.642,{ic:.109,sk:.0833}],120563:[.704,.022,.763,{sk:.0833}],120564:[.683,0,.78,{sk:.0833}],120565:[.677,0,.584,{ic:.12,sk:.0833}],120566:[.705,0,.583,{ic:.117,sk:.0556}],120567:[.683,0,.667,{sk:.0833}],120568:[.683,0,.828,{sk:.0833}],120569:[.683,0,.612,{ic:.08,sk:.0556}],120570:[.704,0,.772,{sk:.0833}],120571:[.683,.033,.833],120572:[.442,.011,.64,{sk:.0278}],120573:[.705,.194,.566,{sk:.0833}],120574:[.441,.216,.518],120575:[.717,.01,.444,{sk:.0556}],120576:[.452,.022,.466,{sk:.0833}],120577:[.704,.204,.438,{sk:.0833}],120578:[.442,.216,.497,{sk:.0556}],120579:[.705,.01,.469,{sk:.0833}],120580:[.442,.01,.354,{sk:.0556}],120581:[.442,.011,.576],120582:[.694,.012,.583],120583:[.442,.216,.603,{sk:.0278}],120584:[.442,0,.494,{sk:.0278}],120585:[.704,.205,.438,{sk:.111}],120586:[.441,.011,.485,{sk:.0556}],120587:[.431,.011,.57],120588:[.442,.216,.517,{sk:.0833}],120589:[.442,.107,.363,{sk:.0833}],120590:[.431,.011,.571],120591:[.431,.013,.437,{ic:.08,sk:.0278}],120592:[.443,.01,.54,{sk:.0278}],120593:[.442,.218,.654,{sk:.0833}],120594:[.442,.204,.626,{sk:.0556}],120595:[.694,.205,.651,{sk:.111}],120596:[.443,.011,.622],120597:[.715,.022,.531,{sk:.0833}],120598:[.431,.011,.406,{sk:.0556}],120599:[.705,.011,.591,{sk:.0833}],120600:[.434,.006,.667,{ic:.067}],120601:[.694,.205,.596,{sk:.0833}],120602:[.442,.194,.517,{sk:.0833}],120603:[.431,.01,.828],120604:[.711,0,.869,{sk:.16}],120605:[.686,0,.866,{sk:.0958}],120606:[.68,0,.657,{ic:.12,sk:.0958}],120607:[.711,0,.958,{sk:.192}],120608:[.68,0,.81,{sk:.0958}],120609:[.686,0,.773,{sk:.0958}],120610:[.686,0,.982,{sk:.0639}],120611:[.702,.017,.867,{sk:.0958}],120612:[.686,0,.511,{ic:.062,sk:.128}],120613:[.686,0,.971,{sk:.0639}],120614:[.711,0,.806,{sk:.192}],120615:[.686,0,1.142,{ic:.077,sk:.0958}],120616:[.686,0,.95,{ic:.077,sk:.0958}],120617:[.675,0,.841,{sk:.0958}],120618:[.703,.017,.837,{sk:.0958}],120619:[.68,0,.982,{sk:.0639}],120620:[.686,0,.723,{ic:.124,sk:.0958}],120621:[.702,.017,.867,{sk:.0958}],120622:[.686,0,.885,{sk:.0958}],120623:[.675,0,.637,{ic:.135,sk:.0958}],120624:[.703,0,.671,{ic:.131,sk:.0639}],120625:[.686,0,.767,{sk:.0958}],120626:[.686,0,.947,{sk:.0958}],120627:[.686,0,.714,{ic:.076,sk:.0639}],120628:[.703,0,.879,{sk:.0958}],120629:[.683,.033,.833],120630:[.452,.008,.761,{sk:.0319}],120631:[.701,.194,.66,{sk:.0958}],120632:[.451,.211,.59],120633:[.725,.008,.522,{sk:.0639}],120634:[.461,.017,.529,{sk:.0958}],120635:[.711,.202,.508,{sk:.0958}],120636:[.452,.211,.6,{sk:.0639}],120637:[.702,.008,.562,{sk:.0958}],120638:[.452,.008,.412,{sk:.0639}],120639:[.452,.008,.668],120640:[.694,.013,.671],120641:[.452,.211,.708,{sk:.0319}],120642:[.452,0,.577,{sk:.0319}],120643:[.711,.201,.508,{sk:.128}],120644:[.452,.008,.585,{sk:.0639}],120645:[.444,.008,.682],120646:[.451,.211,.612,{sk:.0958}],120647:[.451,.105,.424,{sk:.0958}],120648:[.444,.008,.686],120649:[.444,.013,.521,{ic:.089,sk:.0319}],120650:[.453,.008,.631,{sk:.0319}],120651:[.452,.216,.747,{sk:.0958}],120652:[.452,.201,.718,{sk:.0639}],120653:[.694,.202,.758,{sk:.128}],120654:[.453,.008,.718],120655:[.715,.022,.531,{sk:.0833}],120656:[.444,.007,.483,{sk:.0639}],120657:[.701,.008,.692,{sk:.0958}],120658:[.434,.006,.667,{ic:.067}],120659:[.694,.202,.712,{sk:.0958}],120660:[.451,.194,.612,{sk:.0958}],120661:[.444,.008,.975],120662:[.694,0,.733],120663:[.694,0,.733],120664:[.691,0,.581],120665:[.694,0,.917],120666:[.691,0,.642],120667:[.694,0,.672],120668:[.694,0,.794],120669:[.716,.022,.856],120670:[.694,0,.331],120671:[.694,0,.764],120672:[.694,0,.672],120673:[.694,0,.978],120674:[.694,0,.794],120675:[.688,0,.733],120676:[.716,.022,.794],120677:[.691,0,.794],120678:[.694,0,.703],120679:[.716,.022,.856],120680:[.694,0,.794],120681:[.688,0,.733],120682:[.715,0,.856],120683:[.694,0,.794],120684:[.694,0,.733],120685:[.694,0,.856],120686:[.716,0,.794],120687:[.683,.033,.833],120688:[.452,.008,.761,{sk:.0319}],120689:[.701,.194,.66,{sk:.0958}],120690:[.451,.211,.59],120691:[.725,.008,.522,{sk:.0639}],120692:[.461,.017,.529,{sk:.0958}],120693:[.711,.202,.508,{sk:.0958}],120694:[.452,.211,.6,{sk:.0639}],120695:[.702,.008,.562,{sk:.0958}],120696:[.452,.008,.412,{sk:.0639}],120697:[.452,.008,.668],120698:[.694,.013,.671],120699:[.452,.211,.708,{sk:.0319}],120700:[.452,0,.577,{sk:.0319}],120701:[.711,.201,.508,{sk:.128}],120702:[.452,.008,.585,{sk:.0639}],120703:[.444,.008,.682],120704:[.451,.211,.612,{sk:.0958}],120705:[.451,.105,.424,{sk:.0958}],120706:[.444,.008,.686],120707:[.444,.013,.521,{ic:.089,sk:.0319}],120708:[.453,.008,.631,{sk:.0319}],120709:[.452,.216,.747,{sk:.0958}],120710:[.452,.201,.718,{sk:.0639}],120711:[.694,.202,.758,{sk:.128}],120712:[.453,.008,.718],120713:[.715,.022,.531,{sk:.0833}],120714:[.444,.007,.483,{sk:.0639}],120715:[.701,.008,.692,{sk:.0958}],120716:[.434,.006,.667,{ic:.067}],120717:[.694,.202,.712,{sk:.0958}],120718:[.451,.194,.612,{sk:.0958}],120719:[.444,.008,.975],120720:[.694,0,.667],120721:[.694,0,.667],120722:[.691,0,.542,{ic:.104}],120723:[.694,0,.833],120724:[.691,0,.597,{ic:.091}],120725:[.694,0,.611,{ic:.091}],120726:[.694,0,.708,{ic:.06}],120727:[.715,.022,.778],120728:[.694,0,.278,{ic:.06}],120729:[.694,0,.694,{ic:.091}],120730:[.694,0,.611],120731:[.694,0,.875,{ic:.054}],120732:[.694,0,.708,{ic:.058}],120733:[.688,0,.667,{ic:.098}],120734:[.716,.022,.736],120735:[.691,0,.708,{ic:.06}],120736:[.694,0,.639,{ic:.051}],120737:[.715,.022,.778],120738:[.694,0,.722,{ic:.091}],120739:[.688,0,.681,{ic:.109}],120740:[.716,0,.778,{ic:.065}],120741:[.694,0,.722],120742:[.694,0,.667,{ic:.091}],120743:[.694,0,.778,{ic:.076}],120744:[.716,0,.722],120745:[.683,.033,.833],120746:[.452,.008,.761,{sk:.0319}],120747:[.701,.194,.66,{sk:.0958}],120748:[.451,.211,.59],120749:[.725,.008,.522,{sk:.0639}],120750:[.461,.017,.529,{sk:.0958}],120751:[.711,.202,.508,{sk:.0958}],120752:[.452,.211,.6,{sk:.0639}],120753:[.702,.008,.562,{sk:.0958}],120754:[.452,.008,.412,{sk:.0639}],120755:[.452,.008,.668],120756:[.694,.013,.671],120757:[.452,.211,.708,{sk:.0319}],120758:[.452,0,.577,{sk:.0319}],120759:[.711,.201,.508,{sk:.128}],120760:[.452,.008,.585,{sk:.0639}],120761:[.444,.008,.682],120762:[.451,.211,.612,{sk:.0958}],120763:[.451,.105,.424,{sk:.0958}],120764:[.444,.008,.686],120765:[.444,.013,.521,{ic:.089,sk:.0319}],120766:[.453,.008,.631,{sk:.0319}],120767:[.452,.216,.747,{sk:.0958}],120768:[.452,.201,.718,{sk:.0639}],120769:[.694,.202,.758,{sk:.128}],120770:[.453,.008,.718],120771:[.715,.022,.531,{sk:.0833}],120772:[.444,.007,.483,{sk:.0639}],120773:[.701,.008,.692,{sk:.0958}],120774:[.434,.006,.667,{ic:.067}],120775:[.694,.202,.712,{sk:.0958}],120776:[.451,.194,.612,{sk:.0958}],120777:[.444,.008,.975],120778:[.68,0,.643,{ic:.106,sk:.0833}],120779:[.605,.085,.778],120782:[.654,.01,.575],120783:[.655,0,.575],120784:[.654,0,.575],120785:[.655,.011,.575],120786:[.656,0,.575],120787:[.655,.011,.575],120788:[.655,.011,.575],120789:[.676,.011,.575],120790:[.654,.011,.575],120791:[.654,.011,.575],120792:[.654,.01,.575],120793:[.655,0,.575],120794:[.654,0,.575],120795:[.655,.011,.575],120796:[.656,0,.575],120797:[.655,.011,.575],120798:[.655,.011,.575],120799:[.676,.011,.575],120800:[.654,.011,.575],120801:[.654,.011,.575],120802:[.678,.022,.5],120803:[.678,0,.5],120804:[.677,0,.5],120805:[.678,.022,.5],120806:[.656,0,.5],120807:[.656,.021,.5],120808:[.677,.022,.5],120809:[.656,.011,.5],120810:[.678,.022,.5],120811:[.677,.022,.5],120812:[.715,.022,.55],120813:[.716,0,.55],120814:[.716,0,.55],120815:[.716,.022,.55],120816:[.694,0,.55],120817:[.694,.022,.55],120818:[.716,.022,.55],120819:[.695,.011,.55],120820:[.715,.022,.55],120821:[.716,.022,.55],120822:[.621,.01,.525],120823:[.622,0,.525],120824:[.622,0,.525],120825:[.622,.011,.525],120826:[.624,0,.525],120827:[.611,.01,.525],120828:[.622,.011,.525],120829:[.627,.01,.525],120830:[.621,.01,.525],120831:[.622,.011,.525]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(195);e.sansSerifBoldItalic=n.AddCSS(i.sansSerifBoldItalic,{32:{c:" "},33:{c:"!"},35:{c:"#"},36:{c:"$"},37:{c:"%"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},61:{c:"="},63:{c:"?"},64:{c:"@"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},95:{c:"_"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},126:{c:"~"},913:{c:"A"},914:{c:"B"},917:{c:"E"},918:{c:"Z"},919:{c:"H"},921:{c:"I"},922:{c:"K"},924:{c:"M"},925:{c:"N"},927:{c:"O"},929:{c:"P"},930:{c:"\\398"},932:{c:"T"},935:{c:"X"},978:{c:"\\3A5"},988:{c:"F"},8213:{c:"\\2014"},8215:{c:"_"},8260:{c:"/"},8710:{c:"\\394"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.sansSerifBoldItalic={32:[0,0,.25],33:[.694,0,.319],34:[.694,-.471,.5],35:[.694,.194,.833],36:[.75,.056,.5,{ic:.065}],37:[.75,.056,.833],38:[.716,.022,.758],39:[.694,-.471,.278,{ic:.057}],40:[.75,.25,.389,{ic:.102}],41:[.75,.25,.389],42:[.75,-.306,.5,{ic:.068}],43:[.583,.083,.778],44:[.098,.125,.278],45:[.259,-.186,.333],46:[.098,0,.278],47:[.75,.25,.5,{ic:.1}],48:[.678,.022,.5],49:[.678,0,.5],50:[.678,0,.5,{ic:.051}],51:[.678,.022,.5],52:[.656,0,.5],53:[.656,.022,.5,{ic:.055}],54:[.678,.022,.5],55:[.656,.011,.5,{ic:.096}],56:[.678,.022,.5,{ic:.054}],57:[.677,.022,.5],58:[.444,0,.278],59:[.444,.125,.278],61:[.37,-.13,.778],63:[.704,0,.472,{ic:.064}],64:[.705,.01,.667],65:[.694,0,.667],66:[.694,0,.667],67:[.705,.01,.639,{ic:.08}],68:[.694,0,.722],69:[.691,0,.597,{ic:.091}],70:[.691,0,.569,{ic:.104}],71:[.705,.011,.667,{ic:.063}],72:[.694,0,.708,{ic:.06}],73:[.694,0,.278,{ic:.06}],74:[.694,.022,.472,{ic:.063}],75:[.694,0,.694,{ic:.091}],76:[.694,0,.542],77:[.694,0,.875,{ic:.054}],78:[.694,0,.708,{ic:.058}],79:[.716,.022,.736],80:[.694,0,.639,{ic:.051}],81:[.716,.125,.736],82:[.694,0,.646,{ic:.052}],83:[.716,.022,.556,{ic:.053}],84:[.688,0,.681,{ic:.109}],85:[.694,.022,.688,{ic:.059}],86:[.694,0,.667,{ic:.132}],87:[.694,0,.944,{ic:.132}],88:[.694,0,.667,{ic:.091}],89:[.694,0,.667,{ic:.143}],90:[.694,0,.611,{ic:.091}],91:[.75,.25,.289,{ic:.136}],93:[.75,.25,.289,{ic:.064}],94:[.694,-.527,.5],95:[-.038,.114,.5,{ic:.065}],97:[.461,.01,.481],98:[.694,.011,.517],99:[.46,.011,.444,{ic:.055}],100:[.694,.01,.517,{ic:.071}],101:[.46,.011,.444],102:[.705,0,.306,{ic:.188}],103:[.455,.206,.5,{ic:.068}],104:[.694,0,.517],105:[.68,0,.239,{ic:.076}],106:[.68,.204,.267,{ic:.069}],107:[.694,0,.489,{ic:.054}],108:[.694,0,.239,{ic:.072}],109:[.455,0,.794],110:[.454,0,.517],111:[.461,.011,.5],112:[.455,.194,.517],113:[.455,.194,.517],114:[.455,0,.342,{ic:.082}],115:[.461,.011,.383,{ic:.053}],116:[.571,.011,.361],117:[.444,.01,.517],118:[.444,0,.461,{ic:.079}],119:[.444,0,.683,{ic:.079}],120:[.444,0,.461,{ic:.076}],121:[.444,.205,.461,{ic:.079}],122:[.444,0,.435,{ic:.059}],126:[.327,-.193,.5,{ic:.06}],160:[0,0,.25],305:[.444,0,.239],567:[.444,.204,.267],768:[.694,-.527,0],769:[.694,-.527,0,{ic:.063}],770:[.694,-.527,0],771:[.677,-.543,0,{ic:.06}],772:[.631,-.552,0,{ic:.064}],774:[.694,-.508,0,{ic:.073}],775:[.68,-.576,0],776:[.68,-.582,0],778:[.693,-.527,0],779:[.694,-.527,0,{ic:.063}],780:[.654,-.487,0,{ic:.06}],913:[.694,0,.667],914:[.694,0,.667],915:[.691,0,.542,{ic:.104}],916:[.694,0,.833],917:[.691,0,.597,{ic:.091}],918:[.694,0,.611,{ic:.091}],919:[.694,0,.708,{ic:.06}],920:[.715,.022,.778],921:[.694,0,.278,{ic:.06}],922:[.694,0,.694,{ic:.091}],923:[.694,0,.611],924:[.694,0,.875,{ic:.054}],925:[.694,0,.708,{ic:.058}],926:[.688,0,.667,{ic:.098}],927:[.716,.022,.736],928:[.691,0,.708,{ic:.06}],929:[.694,0,.639,{ic:.051}],930:[.715,.022,.778],931:[.694,0,.722,{ic:.091}],932:[.688,0,.681,{ic:.109}],933:[.716,0,.778,{ic:.065}],934:[.694,0,.722],935:[.694,0,.667,{ic:.091}],936:[.694,0,.778,{ic:.076}],937:[.716,0,.722],978:[.716,0,.778,{ic:.065}],988:[.691,0,.569,{ic:.104}],8211:[.312,-.236,.5,{ic:.065}],8212:[.312,-.236,1,{ic:.065}],8213:[.312,-.236,1,{ic:.065}],8215:[-.038,.114,.5,{ic:.065}],8216:[.694,-.471,.278,{ic:.058}],8217:[.694,-.471,.278,{ic:.057}],8220:[.694,-.471,.5,{ic:.114}],8221:[.694,-.471,.5],8260:[.75,.25,.5,{ic:.1}],8710:[.694,0,.833]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(197);e.sansSerifBold=n.AddCSS(i.sansSerifBold,{32:{c:" "},33:{c:"!"},35:{c:"#"},36:{c:"$"},37:{c:"%"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},61:{c:"="},63:{c:"?"},64:{c:"@"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},95:{c:"_"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},126:{c:"~"},913:{c:"A"},914:{c:"B"},917:{c:"E"},918:{c:"Z"},919:{c:"H"},921:{c:"I"},922:{c:"K"},924:{c:"M"},925:{c:"N"},927:{c:"O"},929:{c:"P"},930:{c:"\\398"},932:{c:"T"},935:{c:"X"},978:{c:"\\3A5"},988:{c:"F"},8213:{c:"\\2014"},8215:{c:"_"},8260:{c:"/"},8710:{c:"\\394"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.sansSerifBold={32:[0,0,.25],33:[.694,0,.367],34:[.694,-.442,.558],35:[.694,.193,.917],36:[.75,.056,.55],37:[.75,.056,1.029],38:[.716,.022,.831],39:[.694,-.442,.306],40:[.75,.249,.428],41:[.75,.25,.428],42:[.75,-.293,.55],43:[.617,.116,.856],44:[.146,.106,.306],45:[.273,-.186,.367],46:[.146,0,.306],47:[.75,.249,.55],48:[.715,.022,.55],49:[.716,0,.55],50:[.716,0,.55],51:[.716,.022,.55],52:[.694,0,.55],53:[.694,.022,.55],54:[.716,.022,.55],55:[.695,.011,.55],56:[.715,.022,.55],57:[.716,.022,.55],58:[.458,0,.306],59:[.458,.106,.306],61:[.407,-.094,.856],63:[.705,0,.519],64:[.704,.011,.733],65:[.694,0,.733],66:[.694,0,.733],67:[.704,.011,.703],68:[.694,0,.794],69:[.691,0,.642],70:[.691,0,.611],71:[.705,.011,.733],72:[.694,0,.794],73:[.694,0,.331],74:[.694,.022,.519],75:[.694,0,.764],76:[.694,0,.581],77:[.694,0,.978],78:[.694,0,.794],79:[.716,.022,.794],80:[.694,0,.703],81:[.716,.106,.794],82:[.694,0,.703],83:[.716,.022,.611],84:[.688,0,.733],85:[.694,.022,.764],86:[.694,0,.733],87:[.694,0,1.039],88:[.694,0,.733],89:[.694,0,.733],90:[.694,0,.672],91:[.75,.25,.343],93:[.75,.25,.343],94:[.694,-.537,.55],95:[-.023,.11,.55],97:[.475,.011,.525],98:[.694,.01,.561],99:[.475,.011,.489],100:[.694,.011,.561],101:[.474,.01,.511],102:[.705,0,.336],103:[.469,.206,.55],104:[.694,0,.561],105:[.695,0,.256],106:[.695,.205,.286],107:[.694,0,.531],108:[.694,0,.256],109:[.469,0,.867],110:[.468,0,.561],111:[.474,.011,.55],112:[.469,.194,.561],113:[.469,.194,.561],114:[.469,0,.372],115:[.474,.01,.422],116:[.589,.01,.404],117:[.458,.011,.561],118:[.458,0,.5],119:[.458,0,.744],120:[.458,0,.5],121:[.458,.205,.5],122:[.458,0,.476],126:[.344,-.198,.55],160:[0,0,.25],305:[.458,0,.256],567:[.458,.205,.286],768:[.694,-.537,0],769:[.694,-.537,0],770:[.694,-.537,0],771:[.694,-.548,0],772:[.66,-.56,0],774:[.694,-.552,0],775:[.695,-.596,0],776:[.695,-.595,0],778:[.694,-.538,0],779:[.694,-.537,0],780:[.657,-.5,0],913:[.694,0,.733],914:[.694,0,.733],915:[.691,0,.581],916:[.694,0,.917],917:[.691,0,.642],918:[.694,0,.672],919:[.694,0,.794],920:[.716,.022,.856],921:[.694,0,.331],922:[.694,0,.764],923:[.694,0,.672],924:[.694,0,.978],925:[.694,0,.794],926:[.688,0,.733],927:[.716,.022,.794],928:[.691,0,.794],929:[.694,0,.703],930:[.716,.022,.856],931:[.694,0,.794],932:[.688,0,.733],933:[.715,0,.856],934:[.694,0,.794],935:[.694,0,.733],936:[.694,0,.856],937:[.716,0,.794],978:[.715,0,.856],988:[.691,0,.611],8211:[.327,-.24,.55],8212:[.327,-.24,1.1],8213:[.327,-.24,1.1],8215:[-.023,.11,.55],8216:[.694,-.443,.306],8217:[.694,-.442,.306],8220:[.694,-.443,.558],8221:[.694,-.442,.558],8260:[.75,.249,.55],8710:[.694,0,.917]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(199);e.sansSerifItalic=n.AddCSS(i.sansSerifItalic,{32:{c:" "},33:{c:"!"},35:{c:"#"},36:{c:"$"},37:{c:"%"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},61:{c:"="},63:{c:"?"},64:{c:"@"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},95:{c:"_"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},126:{c:"~"},913:{c:"A"},914:{c:"B"},917:{c:"E"},918:{c:"Z"},919:{c:"H"},921:{c:"I"},922:{c:"K"},924:{c:"M"},925:{c:"N"},927:{c:"O"},929:{c:"P"},930:{c:"\\398"},932:{c:"T"},935:{c:"X"},978:{c:"\\3A5"},988:{c:"F"},8213:{c:"\\2014"},8215:{c:"_"},8260:{c:"/"},8710:{c:"\\394"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.sansSerifItalic={32:[0,0,.25],33:[.694,0,.319],34:[.694,-.471,.5],35:[.694,.194,.833],36:[.75,.056,.5,{ic:.065}],37:[.75,.056,.833],38:[.716,.022,.758],39:[.694,-.471,.278,{ic:.057}],40:[.75,.25,.389,{ic:.102}],41:[.75,.25,.389],42:[.75,-.306,.5,{ic:.068}],43:[.583,.083,.778],44:[.098,.125,.278],45:[.259,-.186,.333],46:[.098,0,.278],47:[.75,.25,.5,{ic:.1}],48:[.678,.022,.5],49:[.678,0,.5],50:[.678,0,.5,{ic:.051}],51:[.678,.022,.5],52:[.656,0,.5],53:[.656,.022,.5,{ic:.055}],54:[.678,.022,.5],55:[.656,.011,.5,{ic:.096}],56:[.678,.022,.5,{ic:.054}],57:[.677,.022,.5],58:[.444,0,.278],59:[.444,.125,.278],61:[.37,-.13,.778],63:[.704,0,.472,{ic:.064}],64:[.705,.01,.667],65:[.694,0,.667],66:[.694,0,.667],67:[.705,.01,.639,{ic:.08}],68:[.694,0,.722],69:[.691,0,.597,{ic:.091}],70:[.691,0,.569,{ic:.104}],71:[.705,.011,.667,{ic:.063}],72:[.694,0,.708,{ic:.06}],73:[.694,0,.278,{ic:.06}],74:[.694,.022,.472,{ic:.063}],75:[.694,0,.694,{ic:.091}],76:[.694,0,.542],77:[.694,0,.875,{ic:.054}],78:[.694,0,.708,{ic:.058}],79:[.716,.022,.736],80:[.694,0,.639,{ic:.051}],81:[.716,.125,.736],82:[.694,0,.646,{ic:.052}],83:[.716,.022,.556,{ic:.053}],84:[.688,0,.681,{ic:.109}],85:[.694,.022,.688,{ic:.059}],86:[.694,0,.667,{ic:.132}],87:[.694,0,.944,{ic:.132}],88:[.694,0,.667,{ic:.091}],89:[.694,0,.667,{ic:.143}],90:[.694,0,.611,{ic:.091}],91:[.75,.25,.289,{ic:.136}],93:[.75,.25,.289,{ic:.064}],94:[.694,-.527,.5],95:[-.038,.114,.5,{ic:.065}],97:[.461,.01,.481],98:[.694,.011,.517],99:[.46,.011,.444,{ic:.055}],100:[.694,.01,.517,{ic:.071}],101:[.46,.011,.444],102:[.705,0,.306,{ic:.188}],103:[.455,.206,.5,{ic:.068}],104:[.694,0,.517],105:[.68,0,.239,{ic:.076}],106:[.68,.204,.267,{ic:.069}],107:[.694,0,.489,{ic:.054}],108:[.694,0,.239,{ic:.072}],109:[.455,0,.794],110:[.454,0,.517],111:[.461,.011,.5],112:[.455,.194,.517],113:[.455,.194,.517],114:[.455,0,.342,{ic:.082}],115:[.461,.011,.383,{ic:.053}],116:[.571,.011,.361],117:[.444,.01,.517],118:[.444,0,.461,{ic:.079}],119:[.444,0,.683,{ic:.079}],120:[.444,0,.461,{ic:.076}],121:[.444,.205,.461,{ic:.079}],122:[.444,0,.435,{ic:.059}],126:[.327,-.193,.5,{ic:.06}],160:[0,0,.25],305:[.444,0,.239],567:[.444,.204,.267],768:[.694,-.527,0],769:[.694,-.527,0,{ic:.063}],770:[.694,-.527,0],771:[.677,-.543,0,{ic:.06}],772:[.631,-.552,0,{ic:.064}],774:[.694,-.508,0,{ic:.073}],775:[.68,-.576,0],776:[.68,-.582,0],778:[.693,-.527,0],779:[.694,-.527,0,{ic:.063}],780:[.654,-.487,0,{ic:.06}],913:[.694,0,.667],914:[.694,0,.667],915:[.691,0,.542,{ic:.104}],916:[.694,0,.833],917:[.691,0,.597,{ic:.091}],918:[.694,0,.611,{ic:.091}],919:[.694,0,.708,{ic:.06}],920:[.715,.022,.778],921:[.694,0,.278,{ic:.06}],922:[.694,0,.694,{ic:.091}],923:[.694,0,.611],924:[.694,0,.875,{ic:.054}],925:[.694,0,.708,{ic:.058}],926:[.688,0,.667,{ic:.098}],927:[.716,.022,.736],928:[.691,0,.708,{ic:.06}],929:[.694,0,.639,{ic:.051}],930:[.715,.022,.778],931:[.694,0,.722,{ic:.091}],932:[.688,0,.681,{ic:.109}],933:[.716,0,.778,{ic:.065}],934:[.694,0,.722],935:[.694,0,.667,{ic:.091}],936:[.694,0,.778,{ic:.076}],937:[.716,0,.722],978:[.716,0,.778,{ic:.065}],988:[.691,0,.569,{ic:.104}],8211:[.312,-.236,.5,{ic:.065}],8212:[.312,-.236,1,{ic:.065}],8213:[.312,-.236,1,{ic:.065}],8215:[-.038,.114,.5,{ic:.065}],8216:[.694,-.471,.278,{ic:.058}],8217:[.694,-.471,.278,{ic:.057}],8220:[.694,-.471,.5,{ic:.114}],8221:[.694,-.471,.5],8260:[.75,.25,.5,{ic:.1}],8710:[.694,0,.833]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(201);e.sansSerif=n.AddCSS(i.sansSerif,{32:{c:" "},33:{c:"!"},35:{c:"#"},36:{c:"$"},37:{c:"%"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},61:{c:"="},63:{c:"?"},64:{c:"@"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},95:{c:"_"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},126:{c:"~"},913:{c:"A"},914:{c:"B"},917:{c:"E"},918:{c:"Z"},919:{c:"H"},921:{c:"I"},922:{c:"K"},924:{c:"M"},925:{c:"N"},927:{c:"O"},929:{c:"P"},930:{c:"\\398"},932:{c:"T"},935:{c:"X"},978:{c:"\\3A5"},988:{c:"F"},8213:{c:"\\2014"},8215:{c:"_"},8260:{c:"/"},8710:{c:"\\394"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.sansSerif={32:[0,0,.25],33:[.694,0,.319],34:[.694,-.471,.5],35:[.694,.194,.833],36:[.75,.056,.5],37:[.75,.056,.833],38:[.716,.022,.758],39:[.694,-.471,.278],40:[.75,.25,.389],41:[.75,.25,.389],42:[.75,-.306,.5],43:[.583,.082,.778],44:[.098,.125,.278],45:[.259,-.186,.333],46:[.098,0,.278],47:[.75,.25,.5],48:[.678,.022,.5],49:[.678,0,.5],50:[.677,0,.5],51:[.678,.022,.5],52:[.656,0,.5],53:[.656,.021,.5],54:[.677,.022,.5],55:[.656,.011,.5],56:[.678,.022,.5],57:[.677,.022,.5],58:[.444,0,.278],59:[.444,.125,.278],61:[.37,-.13,.778],63:[.704,0,.472],64:[.704,.011,.667],65:[.694,0,.667],66:[.694,0,.667],67:[.705,.011,.639],68:[.694,0,.722],69:[.691,0,.597],70:[.691,0,.569],71:[.704,.011,.667],72:[.694,0,.708],73:[.694,0,.278],74:[.694,.022,.472],75:[.694,0,.694],76:[.694,0,.542],77:[.694,0,.875],78:[.694,0,.708],79:[.715,.022,.736],80:[.694,0,.639],81:[.715,.125,.736],82:[.694,0,.646],83:[.716,.022,.556],84:[.688,0,.681],85:[.694,.022,.688],86:[.694,0,.667],87:[.694,0,.944],88:[.694,0,.667],89:[.694,0,.667],90:[.694,0,.611],91:[.75,.25,.289],93:[.75,.25,.289],94:[.694,-.527,.5],95:[-.038,.114,.5],97:[.46,.01,.481],98:[.694,.011,.517],99:[.46,.01,.444],100:[.694,.01,.517],101:[.461,.01,.444],102:[.705,0,.306],103:[.455,.206,.5],104:[.694,0,.517],105:[.68,0,.239],106:[.68,.205,.267],107:[.694,0,.489],108:[.694,0,.239],109:[.455,0,.794],110:[.455,0,.517],111:[.46,.01,.5],112:[.455,.194,.517],113:[.455,.194,.517],114:[.455,0,.342],115:[.46,.01,.383],116:[.571,.01,.361],117:[.444,.01,.517],118:[.444,0,.461],119:[.444,0,.683],120:[.444,0,.461],121:[.444,.204,.461],122:[.444,0,.435],126:[.327,-.193,.5],160:[0,0,.25],305:[.444,0,.239],567:[.444,.205,.267],768:[.694,-.527,0],769:[.694,-.527,0],770:[.694,-.527,0],771:[.677,-.543,0],772:[.631,-.552,0],774:[.694,-.508,0],775:[.68,-.576,0],776:[.68,-.582,0],778:[.694,-.527,0],779:[.694,-.527,0],780:[.654,-.487,0],913:[.694,0,.667],914:[.694,0,.667],915:[.691,0,.542],916:[.694,0,.833],917:[.691,0,.597],918:[.694,0,.611],919:[.694,0,.708],920:[.716,.021,.778],921:[.694,0,.278],922:[.694,0,.694],923:[.694,0,.611],924:[.694,0,.875],925:[.694,0,.708],926:[.688,0,.667],927:[.715,.022,.736],928:[.691,0,.708],929:[.694,0,.639],930:[.716,.021,.778],931:[.694,0,.722],932:[.688,0,.681],933:[.716,0,.778],934:[.694,0,.722],935:[.694,0,.667],936:[.694,0,.778],937:[.716,0,.722],978:[.716,0,.778],988:[.691,0,.569],8211:[.312,-.236,.5],8212:[.312,-.236,1],8213:[.312,-.236,1],8215:[-.038,.114,.5],8216:[.694,-.471,.278],8217:[.694,-.471,.278],8220:[.694,-.471,.5],8221:[.694,-.471,.5],8260:[.75,.25,.5],8710:[.694,0,.833]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(203);e.scriptBold=n.AddCSS(i.scriptBold,{32:{c:" "},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},913:{c:"A",f:"B"},914:{c:"B",f:"B"},917:{c:"E",f:"B"},918:{c:"Z",f:"B"},919:{c:"H",f:"B"},921:{c:"I",f:"B"},922:{c:"K",f:"B"},924:{c:"M",f:"B"},925:{c:"N",f:"B"},927:{c:"O",f:"B"},929:{c:"P",f:"B"},930:{c:"\\398",f:"B"},932:{c:"T",f:"B"},935:{c:"X",f:"B"},978:{c:"\\3A5",f:"B"},988:{c:"F",f:"B"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.scriptBold={32:[0,0,.25],65:[.717,.008,.803,{ic:.213,sk:.389}],66:[.708,.028,.908,{sk:.194}],67:[.728,.026,.666,{ic:.153,sk:.278}],68:[.708,.031,.774,{ic:.081,sk:.111}],69:[.707,.008,.562,{ic:.156,sk:.139}],70:[.735,.036,.895,{ic:.095,sk:.222}],71:[.717,.037,.61,{ic:.128,sk:.25}],72:[.717,.036,.969,{ic:.272,sk:.333}],73:[.717,.017,.809,{ic:.137,sk:.333}],74:[.717,.314,1.052,{ic:.081,sk:.417}],75:[.717,.037,.914,{ic:.29,sk:.361}],76:[.717,.017,.874,{ic:.161,sk:.306}],77:[.721,.05,1.08,{ic:.136,sk:.444}],78:[.726,.036,.902,{ic:.306,sk:.389}],79:[.707,.008,.738,{ic:.067,sk:.167}],80:[.716,.037,1.013,{sk:.222}],81:[.717,.017,.883,{sk:.278}],82:[.717,.017,.85,{sk:.194}],83:[.708,.036,.868,{ic:.148,sk:.333}],84:[.735,.037,.747,{ic:.249,sk:.222}],85:[.717,.017,.8,{ic:.16,sk:.25}],86:[.717,.017,.622,{ic:.228,sk:.222}],87:[.717,.017,.805,{ic:.221,sk:.25}],88:[.717,.017,.944,{ic:.187,sk:.278}],89:[.716,.017,.71,{ic:.249,sk:.194}],90:[.717,.016,.821,{ic:.211,sk:.306}],160:[0,0,.25],913:[.698,0,.869],914:[.686,0,.818],917:[.68,0,.756],918:[.686,0,.703],919:[.686,0,.9],921:[.686,0,.436],922:[.686,0,.901],924:[.686,0,1.092],925:[.686,0,.9],927:[.696,.01,.864],929:[.686,0,.786],930:[.696,.01,.894],932:[.675,0,.8],935:[.686,0,.869],978:[.697,0,.894],988:[.68,0,.724]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(205);e.script=n.AddCSS(i.script,{32:{c:" "},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},913:{c:"A",f:""},914:{c:"B",f:""},917:{c:"E",f:""},918:{c:"Z",f:""},919:{c:"H",f:""},921:{c:"I",f:""},922:{c:"K",f:""},924:{c:"M",f:""},925:{c:"N",f:""},927:{c:"O",f:""},929:{c:"P",f:""},930:{c:"\\398",f:""},932:{c:"T",f:""},935:{c:"X",f:""},978:{c:"\\3A5",f:""},988:{c:"F",f:""},8459:{c:"H",f:"SC"},8464:{c:"J",f:"SC"},8466:{c:"L",f:"SC"},8475:{c:"R",f:"SC"},8492:{c:"B",f:"SC"},8496:{c:"E",f:"SC"},8497:{c:"F",f:"SC"},8499:{c:"M",f:"SC"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.script={32:[0,0,.25],65:[.717,.008,.803,{ic:.213,sk:.389}],66:[.708,.028,.908,{sk:.194}],67:[.728,.026,.666,{ic:.153,sk:.278}],68:[.708,.031,.774,{ic:.081,sk:.111}],69:[.707,.008,.562,{ic:.156,sk:.139}],70:[.735,.036,.895,{ic:.095,sk:.222}],71:[.717,.037,.61,{ic:.128,sk:.25}],72:[.717,.036,.969,{ic:.272,sk:.333}],73:[.717,.017,.809,{ic:.137,sk:.333}],74:[.717,.314,1.052,{ic:.081,sk:.417}],75:[.717,.037,.914,{ic:.29,sk:.361}],76:[.717,.017,.874,{ic:.161,sk:.306}],77:[.721,.05,1.08,{ic:.136,sk:.444}],78:[.726,.036,.902,{ic:.306,sk:.389}],79:[.707,.008,.738,{ic:.067,sk:.167}],80:[.716,.037,1.013,{sk:.222}],81:[.717,.017,.883,{sk:.278}],82:[.717,.017,.85,{sk:.194}],83:[.708,.036,.868,{ic:.148,sk:.333}],84:[.735,.037,.747,{ic:.249,sk:.222}],85:[.717,.017,.8,{ic:.16,sk:.25}],86:[.717,.017,.622,{ic:.228,sk:.222}],87:[.717,.017,.805,{ic:.221,sk:.25}],88:[.717,.017,.944,{ic:.187,sk:.278}],89:[.716,.017,.71,{ic:.249,sk:.194}],90:[.717,.016,.821,{ic:.211,sk:.306}],160:[0,0,.25],913:[.716,0,.75],914:[.683,0,.708],917:[.68,0,.681],918:[.683,0,.611],919:[.683,0,.75],921:[.683,0,.361],922:[.683,0,.778],924:[.683,0,.917],925:[.683,0,.75],927:[.705,.022,.778],929:[.683,0,.681],930:[.705,.022,.778],932:[.677,0,.722],935:[.683,0,.75],978:[.705,0,.778],988:[.68,0,.653],8459:[.717,.036,.969,{ic:.272,sk:.333}],8464:[.717,.314,1.052,{ic:.081,sk:.417}],8466:[.717,.017,.874,{ic:.161,sk:.306}],8475:[.717,.017,.85,{sk:.194}],8492:[.708,.028,.908,{sk:.194}],8496:[.707,.008,.562,{ic:.156,sk:.139}],8497:[.735,.036,.895,{ic:.095,sk:.222}],8499:[.721,.05,1.08,{ic:.136,sk:.444}]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(207);e.smallop=n.AddCSS(i.smallop,{32:{c:" "},40:{c:"("},41:{c:")"},47:{c:"/"},91:{c:"["},93:{c:"]"},123:{c:"{"},125:{c:"}"},8260:{c:"/"},9001:{c:"\\27E8"},9002:{c:"\\27E9"},10072:{c:"\\2223"},10764:{c:"\\222C\\222C"},12296:{c:"\\27E8"},12297:{c:"\\27E9"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.smallop={32:[0,0,.25],40:[.85,.349,.458],41:[.85,.349,.458],47:[.85,.349,.578],91:[.85,.349,.417],92:[.85,.349,.578],93:[.85,.349,.417],123:[.85,.349,.583],125:[.85,.349,.583],160:[0,0,.25],710:[.744,-.551,.556],732:[.722,-.597,.556],770:[.744,-.551,0],771:[.722,-.597,0],8214:[.602,0,.778],8260:[.85,.349,.578],8593:[.6,0,.667],8595:[.6,0,.667],8657:[.599,0,.778],8659:[.6,0,.778],8719:[.75,.25,.944],8720:[.75,.25,.944],8721:[.75,.25,1.056],8730:[.85,.35,1],8739:[.627,.015,.333],8741:[.627,.015,.556],8747:[.805,.306,.472,{ic:.138}],8748:[.805,.306,.819,{ic:.138}],8749:[.805,.306,1.166,{ic:.138}],8750:[.805,.306,.472,{ic:.138}],8896:[.75,.249,.833],8897:[.75,.249,.833],8898:[.75,.249,.833],8899:[.75,.249,.833],8968:[.85,.349,.472],8969:[.85,.349,.472],8970:[.85,.349,.472],8971:[.85,.349,.472],9001:[.85,.35,.472],9002:[.85,.35,.472],9168:[.602,0,.667],10072:[.627,.015,.333],10216:[.85,.35,.472],10217:[.85,.35,.472],10752:[.75,.25,1.111],10753:[.75,.25,1.111],10754:[.75,.25,1.111],10756:[.75,.249,.833],10758:[.75,.249,.833],10764:[.805,.306,1.638,{ic:.138}],12296:[.85,.35,.472],12297:[.85,.35,.472]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(209);e.texCalligraphicBold=n.AddCSS(i.texCalligraphicBold,{32:{c:" "},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},913:{c:"A",f:"BI"},914:{c:"B",f:"BI"},917:{c:"E",f:"BI"},918:{c:"Z",f:"BI"},919:{c:"H",f:"BI"},921:{c:"I",f:"BI"},922:{c:"K",f:"BI"},924:{c:"M",f:"BI"},925:{c:"N",f:"BI"},927:{c:"O",f:"BI"},929:{c:"P",f:"BI"},930:{c:"\\398",f:"BI"},932:{c:"T",f:"BI"},935:{c:"X",f:"BI"},978:{c:"\\3A5",f:"BI"},988:{c:"F",f:"BI"},8260:{c:"/"},8710:{c:"\\394"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.texCalligraphicBold={32:[0,0,.25],47:[.711,.21,.894],48:[.46,.017,.575],49:[.461,0,.575],50:[.46,0,.575],51:[.461,.211,.575],52:[.469,.194,.575],53:[.461,.211,.575],54:[.66,.017,.575],55:[.476,.211,.575],56:[.661,.017,.575],57:[.461,.21,.575],65:[.751,.049,.921,{ic:.068,sk:.224}],66:[.705,.017,.748,{sk:.16}],67:[.703,.02,.613,{sk:.16}],68:[.686,0,.892,{sk:.0958}],69:[.703,.016,.607,{sk:.128}],70:[.686,.03,.814,{ic:.116,sk:.128}],71:[.703,.113,.682,{sk:.128}],72:[.686,.048,.987,{sk:.128}],73:[.686,0,.642,{ic:.104,sk:.0319}],74:[.686,.114,.779,{ic:.158,sk:.192}],75:[.703,.017,.871,{sk:.0639}],76:[.703,.017,.788,{sk:.16}],77:[.703,.049,1.378,{sk:.16}],78:[.84,.049,.937,{ic:.168,sk:.0958}],79:[.703,.017,.906,{sk:.128}],80:[.686,.067,.81,{sk:.0958}],81:[.703,.146,.939,{sk:.128}],82:[.686,.017,.99,{sk:.0958}],83:[.703,.016,.696,{sk:.16}],84:[.72,.069,.644,{ic:.303,sk:.0319}],85:[.686,.024,.715,{ic:.056,sk:.0958}],86:[.686,.077,.737,{sk:.0319}],87:[.686,.077,1.169,{sk:.0958}],88:[.686,0,.817,{ic:.089,sk:.16}],89:[.686,.164,.759,{sk:.0958}],90:[.686,0,.818,{sk:.16}],97:[.452,.008,.633],98:[.694,.008,.521],99:[.451,.008,.513,{sk:.0639}],100:[.694,.008,.61,{sk:.192}],101:[.452,.008,.554,{sk:.0639}],102:[.701,.201,.568,{ic:.056,sk:.192}],103:[.452,.202,.545,{sk:.0319}],104:[.694,.008,.668,{sk:-.0319}],105:[.694,.008,.405],106:[.694,.202,.471],107:[.694,.008,.604],108:[.694,.008,.348,{sk:.0958}],109:[.452,.008,1.032],110:[.452,.008,.713],111:[.452,.008,.585,{sk:.0639}],112:[.452,.194,.601,{sk:.0958}],113:[.452,.194,.542,{sk:.0958}],114:[.452,.008,.529,{sk:.0639}],115:[.451,.008,.531,{sk:.0639}],116:[.643,.007,.415,{sk:.0958}],117:[.452,.008,.681,{sk:.0319}],118:[.453,.008,.567,{sk:.0319}],119:[.453,.008,.831,{sk:.0958}],120:[.452,.008,.659,{sk:.0319}],121:[.452,.202,.59,{sk:.0639}],122:[.452,.008,.555,{sk:.0639}],160:[0,0,.25],913:[.711,0,.869,{sk:.16}],914:[.686,0,.866,{sk:.0958}],915:[.68,0,.657,{ic:.12,sk:.0958}],916:[.711,0,.958,{sk:.192}],917:[.68,0,.81,{sk:.0958}],918:[.686,0,.773,{sk:.0958}],919:[.686,0,.982,{sk:.0639}],920:[.702,.017,.867,{sk:.0958}],921:[.686,0,.511,{ic:.062,sk:.128}],922:[.686,0,.971,{sk:.0639}],923:[.711,0,.806,{sk:.192}],924:[.686,0,1.142,{ic:.077,sk:.0958}],925:[.686,0,.95,{ic:.077,sk:.0958}],926:[.675,0,.841,{sk:.0958}],927:[.703,.017,.837,{sk:.0958}],928:[.68,0,.982,{sk:.0639}],929:[.686,0,.723,{ic:.124,sk:.0958}],930:[.702,.017,.867,{sk:.0958}],931:[.686,0,.885,{sk:.0958}],932:[.675,0,.637,{ic:.135,sk:.0958}],933:[.703,0,.671,{ic:.131,sk:.0639}],934:[.686,0,.767,{sk:.0958}],935:[.686,0,.947,{sk:.0958}],936:[.686,0,.714,{ic:.076,sk:.0639}],937:[.703,0,.879,{sk:.0958}],945:[.452,.008,.761,{sk:.0319}],946:[.701,.194,.66,{sk:.0958}],947:[.451,.211,.59],948:[.725,.008,.522,{sk:.0639}],949:[.461,.017,.529,{sk:.0958}],950:[.711,.202,.508,{sk:.0958}],951:[.452,.211,.6,{sk:.0639}],952:[.702,.008,.562,{sk:.0958}],953:[.452,.008,.412,{sk:.0639}],954:[.452,.008,.668],955:[.694,.013,.671],956:[.452,.211,.708,{sk:.0319}],957:[.452,0,.577,{sk:.0319}],958:[.711,.201,.508,{sk:.128}],959:[.452,.008,.585,{sk:.0639}],960:[.444,.008,.682],961:[.451,.211,.612,{sk:.0958}],962:[.451,.105,.424,{sk:.0958}],963:[.444,.008,.686],964:[.444,.013,.521,{ic:.089,sk:.0319}],965:[.453,.008,.631,{sk:.0319}],966:[.452,.216,.747,{sk:.0958}],967:[.452,.201,.718,{sk:.0639}],968:[.694,.202,.758,{sk:.128}],969:[.453,.008,.718],977:[.701,.008,.692,{sk:.0958}],978:[.703,0,.671,{ic:.131,sk:.0639}],981:[.694,.202,.712,{sk:.0958}],982:[.444,.008,.975],988:[.68,0,.689,{ic:.12,sk:.0958}],1009:[.451,.194,.612,{sk:.0958}],1013:[.444,.007,.483,{sk:.0639}],8260:[.711,.21,.894],8710:[.711,0,.958,{sk:.192}]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(211);e.texCalligraphic=n.AddCSS(i.texCalligraphic,{32:{c:" "},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},913:{c:"A",f:"I"},914:{c:"B",f:"I"},917:{c:"E",f:"I"},918:{c:"Z",f:"I"},919:{c:"H",f:"I"},921:{c:"I",f:"I"},922:{c:"K",f:"I"},924:{c:"M",f:"I"},925:{c:"N",f:"I"},927:{c:"O",f:"I"},929:{c:"P",f:"I"},930:{c:"\\398",f:"I"},932:{c:"T",f:"I"},935:{c:"X",f:"I"},978:{c:"\\3A5",f:"I"},988:{c:"F",f:"I"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.texCalligraphic={32:[0,0,.25],48:[.452,.022,.5],49:[.453,0,.5],50:[.453,0,.5],51:[.452,.216,.5],52:[.464,.194,.5],53:[.453,.216,.5],54:[.665,.022,.5],55:[.463,.216,.5],56:[.666,.021,.5],57:[.453,.216,.5],65:[.728,.05,.798,{sk:.194}],66:[.705,.022,.657,{sk:.139}],67:[.705,.025,.527,{sk:.139}],68:[.683,0,.771,{sk:.0833}],69:[.705,.022,.528,{sk:.111}],70:[.683,.032,.719,{ic:.11,sk:.111}],71:[.704,.119,.595,{sk:.111}],72:[.683,.048,.845,{sk:.111}],73:[.683,0,.545,{ic:.097,sk:.0278}],74:[.683,.119,.678,{ic:.161,sk:.167}],75:[.705,.022,.762,{sk:.0556}],76:[.705,.022,.69,{sk:.139}],77:[.705,.05,1.201,{sk:.139}],78:[.789,.05,.82,{ic:.159,sk:.0833}],79:[.705,.022,.796,{sk:.111}],80:[.683,.057,.696,{sk:.0833}],81:[.705,.131,.817,{sk:.111}],82:[.682,.022,.848,{sk:.0833}],83:[.705,.022,.606,{sk:.139}],84:[.717,.068,.545,{ic:.288,sk:.0278}],85:[.683,.028,.626,{ic:.061,sk:.0833}],86:[.683,.052,.613,{sk:.0278}],87:[.683,.053,.988,{sk:.0833}],88:[.683,0,.713,{ic:.094,sk:.139}],89:[.683,.143,.668,{sk:.0833}],90:[.683,0,.725,{sk:.139}],160:[0,0,.25],913:[.716,0,.75,{sk:.139}],914:[.683,0,.759,{sk:.0833}],917:[.68,0,.738,{sk:.0833}],918:[.683,0,.683,{sk:.0833}],919:[.683,0,.831,{ic:.057,sk:.0556}],921:[.683,0,.44,{ic:.064,sk:.111}],922:[.683,0,.849,{sk:.0556}],924:[.683,0,.97,{ic:.081,sk:.0833}],925:[.683,0,.803,{ic:.085,sk:.0833}],927:[.704,.022,.763,{sk:.0833}],929:[.683,0,.642,{ic:.109,sk:.0833}],930:[.704,.022,.763,{sk:.0833}],932:[.677,0,.584,{ic:.12,sk:.0833}],935:[.683,0,.828,{sk:.0833}],978:[.705,0,.583,{ic:.117,sk:.0556}],988:[.68,0,.643,{ic:.106,sk:.0833}]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(213);e.texMathit=n.AddCSS(i.texMathit,{32:{c:" "},33:{c:"!"},35:{c:"#"},37:{c:"%"},38:{c:"&"},40:{c:"("},41:{c:")"},42:{c:"*"},43:{c:"+"},44:{c:","},45:{c:"-"},46:{c:"."},47:{c:"/"},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},58:{c:":"},59:{c:";"},61:{c:"="},63:{c:"?"},64:{c:"@"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},91:{c:"["},93:{c:"]"},94:{c:"^"},95:{c:"_"},97:{c:"a"},98:{c:"b"},99:{c:"c"},100:{c:"d"},101:{c:"e"},102:{c:"f"},103:{c:"g"},104:{c:"h"},105:{c:"i"},106:{c:"j"},107:{c:"k"},108:{c:"l"},109:{c:"m"},110:{c:"n"},111:{c:"o"},112:{c:"p"},113:{c:"q"},114:{c:"r"},115:{c:"s"},116:{c:"t"},117:{c:"u"},118:{c:"v"},119:{c:"w"},120:{c:"x"},121:{c:"y"},122:{c:"z"},126:{c:"~"},913:{c:"A"},914:{c:"B"},917:{c:"E"},918:{c:"Z"},919:{c:"H"},921:{c:"I"},922:{c:"K"},924:{c:"M"},925:{c:"N"},927:{c:"O"},929:{c:"P"},930:{c:"\\398"},932:{c:"T"},935:{c:"X"},978:{c:"\\3A5"},988:{c:"F"},8213:{c:"\\2014"},8215:{c:"_"},8260:{c:"/"},8710:{c:"\\394"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.texMathit={32:[0,0,.25],33:[.716,0,.307,{ic:.073}],34:[.694,-.379,.514],35:[.694,.194,.818],37:[.75,.056,.818],38:[.716,.022,.767],39:[.694,-.379,.307,{ic:.07}],40:[.75,.25,.409,{ic:.108}],41:[.75,.25,.409],42:[.75,-.32,.511,{ic:.073}],43:[.557,.057,.767],44:[.121,.194,.307],45:[.251,-.18,.358],46:[.121,0,.307],47:[.75,.25,.511,{ic:.106}],48:[.665,.021,.511,{ic:.051}],49:[.666,0,.511],50:[.666,.022,.511],51:[.666,.022,.511,{ic:.051}],52:[.666,.194,.511],53:[.666,.022,.511,{ic:.056}],54:[.665,.022,.511,{ic:.054}],55:[.666,.022,.511,{ic:.123}],56:[.666,.021,.511],57:[.666,.022,.511],58:[.431,0,.307],59:[.431,.194,.307],61:[.367,-.133,.767],63:[.716,0,.511],64:[.705,.011,.767],65:[.716,0,.743],66:[.683,0,.704],67:[.705,.021,.716,{ic:.096}],68:[.683,0,.755],69:[.68,0,.678,{ic:.065}],70:[.68,0,.653,{ic:.078}],71:[.705,.022,.774],72:[.683,0,.743,{ic:.117}],73:[.683,0,.386,{ic:.122}],74:[.683,.021,.525,{ic:.097}],75:[.683,0,.769,{ic:.09}],76:[.683,0,.627],77:[.683,0,.897,{ic:.113}],78:[.683,0,.743,{ic:.117}],79:[.704,.022,.767],80:[.683,0,.678,{ic:.051}],81:[.704,.194,.767],82:[.683,.022,.729],83:[.705,.022,.562,{ic:.071}],84:[.677,0,.716,{ic:.09}],85:[.683,.022,.743,{ic:.117}],86:[.683,.022,.743,{ic:.125}],87:[.683,.022,.999,{ic:.125}],88:[.683,0,.743,{ic:.082}],89:[.683,0,.743,{ic:.132}],90:[.683,0,.613,{ic:.091}],91:[.75,.25,.307,{ic:.139}],93:[.75,.25,.307,{ic:.052}],94:[.694,-.527,.511],95:[-.025,.062,.511],97:[.442,.011,.511],98:[.694,.011,.46],99:[.441,.01,.46],100:[.694,.011,.511,{ic:.056}],101:[.442,.01,.46],102:[.705,.204,.307,{ic:.143}],103:[.442,.205,.46],104:[.694,.011,.511],105:[.656,.01,.307],106:[.656,.204,.307,{ic:.057}],107:[.694,.011,.46],108:[.694,.011,.256,{ic:.056}],109:[.442,.011,.818],110:[.442,.011,.562],111:[.442,.011,.511],112:[.442,.194,.511],113:[.442,.194,.46],114:[.442,.011,.422,{ic:.062}],115:[.442,.011,.409],116:[.626,.011,.332],117:[.441,.011,.537],118:[.443,.01,.46],119:[.443,.011,.664],120:[.442,.011,.464],121:[.441,.205,.486],122:[.442,.011,.409,{ic:.057}],126:[.318,-.208,.511,{ic:.06}],160:[0,0,.25],163:[.714,.011,.769],305:[.441,.01,.307],567:[.442,.204,.332],768:[.697,-.5,0],769:[.697,-.5,0],770:[.694,-.527,0],771:[.668,-.558,0,{ic:.06}],772:[.589,-.544,0,{ic:.054}],774:[.694,-.515,0,{ic:.062}],775:[.669,-.548,0],776:[.669,-.554,0],778:[.716,-.542,0],779:[.697,-.503,0,{ic:.065}],780:[.638,-.502,0],913:[.716,0,.743],914:[.683,0,.704],915:[.68,0,.627,{ic:.078}],916:[.716,0,.818],917:[.68,0,.678,{ic:.065}],918:[.683,0,.613,{ic:.091}],919:[.683,0,.743,{ic:.117}],920:[.704,.022,.767],921:[.683,0,.386,{ic:.122}],922:[.683,0,.769,{ic:.09}],923:[.716,0,.692],924:[.683,0,.897,{ic:.113}],925:[.683,0,.743,{ic:.117}],926:[.677,0,.664,{ic:.09}],927:[.704,.022,.767],928:[.68,0,.743,{ic:.116}],929:[.683,0,.678,{ic:.051}],930:[.704,.022,.767],931:[.683,0,.716,{ic:.066}],932:[.677,0,.716,{ic:.09}],933:[.705,0,.767,{ic:.065}],934:[.683,0,.716],935:[.683,0,.743,{ic:.082}],936:[.683,0,.767,{ic:.057}],937:[.705,0,.716],978:[.705,0,.767,{ic:.065}],988:[.68,0,.653,{ic:.078}],8211:[.285,-.248,.511],8212:[.285,-.248,1.022],8213:[.285,-.248,1.022],8215:[-.025,.062,.511],8216:[.694,-.379,.307,{ic:.055}],8217:[.694,-.379,.307,{ic:.07}],8220:[.694,-.379,.514,{ic:.092}],8221:[.694,-.379,.514],8260:[.75,.25,.511,{ic:.106}],8463:[.695,.013,.54],8710:[.716,0,.818]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(215);e.texOldstyleBold=n.AddCSS(i.texOldstyleBold,{32:{c:" "},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},913:{c:"A",f:"B"},914:{c:"B",f:"B"},917:{c:"E",f:"B"},918:{c:"Z",f:"B"},919:{c:"H",f:"B"},921:{c:"I",f:"B"},922:{c:"K",f:"B"},924:{c:"M",f:"B"},925:{c:"N",f:"B"},927:{c:"O",f:"B"},929:{c:"P",f:"B"},930:{c:"\\398",f:"B"},932:{c:"T",f:"B"},935:{c:"X",f:"B"},978:{c:"\\3A5",f:"B"},988:{c:"F",f:"B"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.texOldstyleBold={32:[0,0,.25],48:[.46,.017,.575],49:[.461,0,.575],50:[.46,0,.575],51:[.461,.211,.575],52:[.469,.194,.575],53:[.461,.211,.575],54:[.66,.017,.575],55:[.476,.211,.575],56:[.661,.017,.575],57:[.461,.21,.575],65:[.751,.049,.921,{ic:.068,sk:.224}],66:[.705,.017,.748,{sk:.16}],67:[.703,.02,.613,{sk:.16}],68:[.686,0,.892,{sk:.0958}],69:[.703,.016,.607,{sk:.128}],70:[.686,.03,.814,{ic:.116,sk:.128}],71:[.703,.113,.682,{sk:.128}],72:[.686,.048,.987,{sk:.128}],73:[.686,0,.642,{ic:.104,sk:.0319}],74:[.686,.114,.779,{ic:.158,sk:.192}],75:[.703,.017,.871,{sk:.0639}],76:[.703,.017,.788,{sk:.16}],77:[.703,.049,1.378,{sk:.16}],78:[.84,.049,.937,{ic:.168,sk:.0958}],79:[.703,.017,.906,{sk:.128}],80:[.686,.067,.81,{sk:.0958}],81:[.703,.146,.939,{sk:.128}],82:[.686,.017,.99,{sk:.0958}],83:[.703,.016,.696,{sk:.16}],84:[.72,.069,.644,{ic:.303,sk:.0319}],85:[.686,.024,.715,{ic:.056,sk:.0958}],86:[.686,.077,.737,{sk:.0319}],87:[.686,.077,1.169,{sk:.0958}],88:[.686,0,.817,{ic:.089,sk:.16}],89:[.686,.164,.759,{sk:.0958}],90:[.686,0,.818,{sk:.16}],160:[0,0,.25],913:[.698,0,.869],914:[.686,0,.818],917:[.68,0,.756],918:[.686,0,.703],919:[.686,0,.9],921:[.686,0,.436],922:[.686,0,.901],924:[.686,0,1.092],925:[.686,0,.9],927:[.696,.01,.864],929:[.686,0,.786],930:[.696,.01,.894],932:[.675,0,.8],935:[.686,0,.869],978:[.697,0,.894],988:[.68,0,.724]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(217);e.texOldstyle=n.AddCSS(i.texOldstyle,{32:{c:" "},48:{c:"0"},49:{c:"1"},50:{c:"2"},51:{c:"3"},52:{c:"4"},53:{c:"5"},54:{c:"6"},55:{c:"7"},56:{c:"8"},57:{c:"9"},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},913:{c:"A",f:""},914:{c:"B",f:""},917:{c:"E",f:""},918:{c:"Z",f:""},919:{c:"H",f:""},921:{c:"I",f:""},922:{c:"K",f:""},924:{c:"M",f:""},925:{c:"N",f:""},927:{c:"O",f:""},929:{c:"P",f:""},930:{c:"\\398",f:""},932:{c:"T",f:""},935:{c:"X",f:""},978:{c:"\\3A5",f:""},988:{c:"F",f:""}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.texOldstyle={32:[0,0,.25],48:[.452,.022,.5],49:[.453,0,.5],50:[.453,0,.5],51:[.452,.216,.5],52:[.464,.194,.5],53:[.453,.216,.5],54:[.665,.022,.5],55:[.463,.216,.5],56:[.666,.021,.5],57:[.453,.216,.5],65:[.728,.05,.798,{sk:.194}],66:[.705,.022,.657,{sk:.139}],67:[.705,.025,.527,{sk:.139}],68:[.683,0,.771,{sk:.0833}],69:[.705,.022,.528,{sk:.111}],70:[.683,.032,.719,{ic:.11,sk:.111}],71:[.704,.119,.595,{sk:.111}],72:[.683,.048,.845,{sk:.111}],73:[.683,0,.545,{ic:.097,sk:.0278}],74:[.683,.119,.678,{ic:.161,sk:.167}],75:[.705,.022,.762,{sk:.0556}],76:[.705,.022,.69,{sk:.139}],77:[.705,.05,1.201,{sk:.139}],78:[.789,.05,.82,{ic:.159,sk:.0833}],79:[.705,.022,.796,{sk:.111}],80:[.683,.057,.696,{sk:.0833}],81:[.705,.131,.817,{sk:.111}],82:[.682,.022,.848,{sk:.0833}],83:[.705,.022,.606,{sk:.139}],84:[.717,.068,.545,{ic:.288,sk:.0278}],85:[.683,.028,.626,{ic:.061,sk:.0833}],86:[.683,.052,.613,{sk:.0278}],87:[.683,.053,.988,{sk:.0833}],88:[.683,0,.713,{ic:.094,sk:.139}],89:[.683,.143,.668,{sk:.0833}],90:[.683,0,.725,{sk:.139}],160:[0,0,.25],913:[.716,0,.75],914:[.683,0,.708],917:[.68,0,.681],918:[.683,0,.611],919:[.683,0,.75],921:[.683,0,.361],922:[.683,0,.778],924:[.683,0,.917],925:[.683,0,.75],927:[.705,.022,.778],929:[.683,0,.681],930:[.705,.022,.778],932:[.677,0,.722],935:[.683,0,.75],978:[.705,0,.778],988:[.68,0,.653]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(219);e.texSize3=n.AddCSS(i.texSize3,{32:{c:" "},40:{c:"("},41:{c:")"},47:{c:"/"},91:{c:"["},93:{c:"]"},123:{c:"{"},125:{c:"}"},8260:{c:"/"},9001:{c:"\\27E8"},9002:{c:"\\27E9"},12296:{c:"\\27E8"},12297:{c:"\\27E9"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.texSize3={32:[0,0,.25],40:[1.45,.949,.736],41:[1.45,.949,.736],47:[1.45,.949,1.044],91:[1.45,.949,.528],92:[1.45,.949,1.044],93:[1.45,.949,.528],123:[1.45,.949,.75],125:[1.45,.949,.75],160:[0,0,.25],710:[.772,-.564,1.444],732:[.749,-.61,1.444],770:[.772,-.564,0],771:[.749,-.61,0],8260:[1.45,.949,1.044],8730:[1.45,.95,1],8968:[1.45,.949,.583],8969:[1.45,.949,.583],8970:[1.45,.949,.583],8971:[1.45,.949,.583],9001:[1.45,.95,.75],9002:[1.45,.949,.75],10216:[1.45,.95,.75],10217:[1.45,.949,.75],12296:[1.45,.95,.75],12297:[1.45,.949,.75]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(221);e.texSize4=n.AddCSS(i.texSize4,{32:{c:" "},40:{c:"("},41:{c:")"},47:{c:"/"},91:{c:"["},93:{c:"]"},123:{c:"{"},125:{c:"}"},8260:{c:"/"},9001:{c:"\\27E8"},9002:{c:"\\27E9"},12296:{c:"\\27E8"},12297:{c:"\\27E9"},57685:{c:"\\E153\\E152"},57686:{c:"\\E151\\E150"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.texSize4={32:[0,0,.25],40:[1.75,1.249,.792],41:[1.75,1.249,.792],47:[1.75,1.249,1.278],91:[1.75,1.249,.583],92:[1.75,1.249,1.278],93:[1.75,1.249,.583],123:[1.75,1.249,.806],125:[1.75,1.249,.806],160:[0,0,.25],710:[.845,-.561,1.889],732:[.823,-.583,1.889],770:[.845,-.561,0],771:[.823,-.583,0],8260:[1.75,1.249,1.278],8730:[1.75,1.25,1],8968:[1.75,1.249,.639],8969:[1.75,1.249,.639],8970:[1.75,1.249,.639],8971:[1.75,1.249,.639],9001:[1.75,1.248,.806],9002:[1.75,1.248,.806],9115:[1.154,.655,.875],9116:[.61,.01,.875],9117:[1.165,.644,.875],9118:[1.154,.655,.875],9119:[.61,.01,.875],9120:[1.165,.644,.875],9121:[1.154,.645,.667],9122:[.602,0,.667],9123:[1.155,.644,.667],9124:[1.154,.645,.667],9125:[.602,0,.667],9126:[1.155,.644,.667],9127:[.899,.01,.889],9128:[1.16,.66,.889],9129:[.01,.899,.889],9130:[.29,.015,.889],9131:[.899,.01,.889],9132:[1.16,.66,.889],9133:[.01,.899,.889],9143:[.935,.885,1.056],10216:[1.75,1.248,.806],10217:[1.75,1.248,.806],12296:[1.75,1.248,.806],12297:[1.75,1.248,.806],57344:[.625,.014,1.056],57345:[.605,.014,1.056],57680:[.12,.213,.45],57681:[.12,.213,.45],57682:[.333,0,.45],57683:[.333,0,.45],57684:[.32,.2,.4],57685:[.333,0,.9],57686:[.12,.213,.9]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(1),i=r(223);e.texVariant=n.AddCSS(i.texVariant,{32:{c:" "},65:{c:"A"},66:{c:"B"},67:{c:"C"},68:{c:"D"},69:{c:"E"},70:{c:"F"},71:{c:"G"},72:{c:"H"},73:{c:"I"},74:{c:"J"},75:{c:"K"},76:{c:"L"},77:{c:"M"},78:{c:"N"},79:{c:"O"},80:{c:"P"},81:{c:"Q"},82:{c:"R"},83:{c:"S"},84:{c:"T"},85:{c:"U"},86:{c:"V"},87:{c:"W"},88:{c:"X"},89:{c:"Y"},90:{c:"Z"},107:{c:"k"},988:{c:"\\E008"},1008:{c:"\\E009"},8463:{f:""},8726:{f:""},8740:{c:"\\E006"},8742:{c:"\\E007"},8808:{c:"\\E00C"},8809:{c:"\\E00D"},8816:{c:"\\E011"},8817:{c:"\\E00E"},8840:{c:"\\E016"},8841:{c:"\\E018"},8842:{c:"\\E01A"},8843:{c:"\\E01B"},10887:{c:"\\E010"},10888:{c:"\\E00F"},10955:{c:"\\E017"},10956:{c:"\\E019"}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.texVariant={32:[0,0,.25],65:[.701,0,.722],66:[.683,0,.667],67:[.702,.019,.722],68:[.683,0,.722],69:[.683,0,.667],70:[.683,0,.611],71:[.702,.019,.778],72:[.683,0,.778],73:[.683,0,.389],74:[.683,.077,.5],75:[.683,0,.778],76:[.683,0,.667],77:[.683,0,.944],78:[.683,.02,.722],79:[.701,.019,.778],80:[.683,0,.611],81:[.701,.181,.778],82:[.683,0,.722],83:[.702,.012,.556],84:[.683,0,.667],85:[.683,.019,.722],86:[.683,.02,.722],87:[.683,.019,1],88:[.683,0,.722],89:[.683,0,.722],90:[.683,0,.667],107:[.683,0,.556],160:[0,0,.25],165:[.683,0,.75],174:[.709,.175,.947],240:[.749,.021,.556],295:[.695,.013,.54],710:[.845,-.561,2.333],732:[.899,-.628,2.333],770:[.845,-.561,0],771:[.899,-.628,0],988:[.605,.085,.778],989:[.605,.085,.778],1008:[.434,.006,.667,{ic:.067}],8245:[.56,-.043,.275],8463:[.695,.013,.54],8487:[.684,.022,.722],8498:[.695,0,.556],8502:[.763,.021,.667],8503:[.764,.043,.444],8504:[.764,.043,.667],8513:[.705,.023,.639],8592:[.437,-.064,.5],8594:[.437,-.064,.5],8602:[.437,-.06,1],8603:[.437,-.06,1],8606:[.417,-.083,1],8608:[.417,-.083,1],8610:[.417,-.083,1.111],8611:[.417,-.083,1.111],8619:[.575,.041,1],8620:[.575,.041,1],8621:[.417,-.083,1.389],8622:[.437,-.06,1],8624:[.722,0,.5],8625:[.722,0,.5],8630:[.461,0,1],8631:[.46,0,1],8634:[.65,.083,.778],8635:[.65,.083,.778],8638:[.694,.194,.417],8639:[.694,.194,.417],8642:[.694,.194,.417],8643:[.694,.194,.417],8644:[.667,0,1],8646:[.667,0,1],8647:[.583,.083,1],8648:[.694,.193,.833],8649:[.583,.083,1],8650:[.694,.194,.833],8651:[.514,.014,1],8652:[.514,.014,1],8653:[.534,.035,1],8654:[.534,.037,1],8655:[.534,.035,1],8666:[.611,.111,1],8667:[.611,.111,1],8669:[.417,-.083,1],8672:[.437,-.064,1.334],8674:[.437,-.064,1.334],8705:[.846,.021,.5],8708:[.86,.166,.556],8709:[.587,0,.778],8717:[.44,0,.429],8722:[.27,-.23,.5],8724:[.766,.093,.778],8726:[.43,.023,.778],8733:[.472,-.028,.778],8736:[.694,0,.722],8737:[.714,.02,.722],8738:[.551,.051,.722],8739:[.43,.023,.222],8740:[.43,.023,.222],8741:[.431,.023,.389],8742:[.431,.024,.389],8756:[.471,.082,.667],8757:[.471,.082,.667],8764:[.365,-.132,.778],8765:[.367,-.133,.778],8769:[.467,-.032,.778],8770:[.463,-.034,.778],8774:[.652,.155,.778],8776:[.481,-.05,.778],8778:[.579,.039,.778],8782:[.492,-.008,.778],8783:[.492,-.133,.778],8785:[.609,.108,.778],8786:[.601,.101,.778],8787:[.601,.102,.778],8790:[.367,-.133,.778],8791:[.721,-.133,.778],8796:[.859,-.133,.778],8806:[.753,.175,.778],8807:[.753,.175,.778],8808:[.752,.284,.778],8809:[.752,.284,.778],8812:[.75,.25,.5],8814:[.708,.209,.778],8815:[.708,.209,.778],8816:[.919,.421,.778],8817:[.919,.421,.778],8818:[.732,.228,.778],8819:[.732,.228,.778],8822:[.681,.253,.778],8823:[.681,.253,.778],8828:[.58,.153,.778],8829:[.58,.154,.778],8830:[.732,.228,.778],8831:[.732,.228,.778],8832:[.705,.208,.778],8833:[.705,.208,.778],8840:[.828,.33,.778],8841:[.828,.33,.778],8842:[.634,.255,.778],8843:[.634,.254,.778],8847:[.539,.041,.778],8848:[.539,.041,.778],8858:[.582,.082,.778],8859:[.582,.082,.778],8861:[.582,.082,.778],8862:[.689,0,.778],8863:[.689,0,.778],8864:[.689,0,.778],8865:[.689,0,.778],8872:[.694,0,.611],8873:[.694,0,.722],8874:[.694,0,.889],8876:[.695,0,.611],8877:[.695,0,.611],8878:[.695,0,.722],8879:[.695,0,.722],8882:[.539,.041,.778],8883:[.539,.041,.778],8884:[.636,.138,.778],8885:[.636,.138,.778],8888:[.408,-.092,1.111],8890:[.431,.212,.556],8891:[.716,0,.611],8892:[.716,0,.611],8901:[.189,0,.278],8903:[.545,.044,.778],8905:[.492,-.008,.778],8906:[.492,-.008,.778],8907:[.694,.022,.778],8908:[.694,.022,.778],8909:[.464,-.036,.778],8910:[.578,.021,.76],8911:[.578,.022,.76],8912:[.54,.04,.778],8913:[.54,.04,.778],8914:[.598,.022,.667],8915:[.598,.022,.667],8916:[.736,.022,.667],8918:[.541,.041,.778],8919:[.541,.041,.778],8920:[.568,.067,1.333],8921:[.568,.067,1.333],8922:[.886,.386,.778],8923:[.886,.386,.778],8926:[.734,0,.778],8927:[.734,0,.778],8928:[.801,.303,.778],8929:[.801,.303,.778],8934:[.73,.359,.778],8935:[.73,.359,.778],8936:[.73,.359,.778],8937:[.73,.359,.778],8938:[.706,.208,.778],8939:[.706,.208,.778],8940:[.802,.303,.778],8941:[.801,.303,.778],8994:[.378,-.122,.778],8995:[.378,-.143,.778],9416:[.709,.175,.902],9484:[.694,-.306,.5],9488:[.694,-.306,.5],9492:[.366,.022,.5],9496:[.366,.022,.5],9585:[.694,.195,.889],9586:[.694,.195,.889],9632:[.689,0,.778],9633:[.689,0,.778],9650:[.575,.02,.722],9651:[.575,.02,.722],9654:[.539,.041,.778],9660:[.576,.019,.722],9661:[.576,.019,.722],9664:[.539,.041,.778],9674:[.716,.132,.667],9733:[.694,.111,.944],10003:[.706,.034,.833],10016:[.716,.022,.833],10731:[.716,.132,.667],10846:[.813,.097,.611],10877:[.636,.138,.778],10878:[.636,.138,.778],10885:[.762,.29,.778],10886:[.762,.29,.778],10887:[.801,.303,.778],10888:[.801,.303,.778],10889:[.761,.387,.778],10890:[.761,.387,.778],10891:[1.003,.463,.778],10892:[1.003,.463,.778],10901:[.636,.138,.778],10902:[.636,.138,.778],10933:[.752,.286,.778],10934:[.752,.286,.778],10935:[.761,.294,.778],10936:[.761,.294,.778],10937:[.761,.337,.778],10938:[.761,.337,.778],10949:[.753,.215,.778],10950:[.753,.215,.778],10955:[.752,.332,.778],10956:[.752,.333,.778],57350:[.43,.023,.222],57351:[.431,.024,.389],57352:[.605,.085,.778],57353:[.434,.006,.667,{ic:.067}],57356:[.752,.284,.778],57357:[.752,.284,.778],57358:[.919,.421,.778],57359:[.801,.303,.778],57360:[.801,.303,.778],57361:[.919,.421,.778],57366:[.828,.33,.778],57367:[.752,.332,.778],57368:[.828,.33,.778],57369:[.752,.333,.778],57370:[.634,.255,.778],57371:[.634,.254,.778]}},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(23);e.HDW1=[.75,.25,.875],e.HDW2=[.583,.082,1],e.HDW3=[.583,.082,.5],e.VSIZES=[1,1.2,1.8,2.4,3];var i={c:47,dir:n.V,sizes:e.VSIZES},o={c:175,dir:n.H,sizes:[.59],stretch:[0,175],HDW:[.59,-.544,.5]},a={c:710,dir:n.H,sizes:[.517,.817,1.335,1.777,1.909]},s={c:732,dir:n.H,sizes:[.583,.805,1.33,1.773,1.887]},c={c:8211,dir:n.H,sizes:[.5],stretch:[0,8211],HDW:[.285,-.248,.5]},l={c:8592,dir:n.H,sizes:[1],stretch:[8592,8722],HDW:e.HDW2},u={c:8594,dir:n.H,sizes:[1],stretch:[0,8722,8594],HDW:e.HDW2},h={c:8596,dir:n.H,sizes:[1],stretch:[8592,8722,8594],HDW:e.HDW2},f={c:8612,dir:n.H,stretch:[8592,8722,8739],HDW:e.HDW3,min:1.278},p={c:8614,dir:n.H,sizes:[1],stretch:[8739,8722,8594],HDW:e.HDW2},d={c:8656,dir:n.H,sizes:[1],stretch:[8656,61],HDW:e.HDW2},m={c:8658,dir:n.H,sizes:[1],stretch:[0,61,8658],HDW:e.HDW2},y={c:8660,dir:n.H,sizes:[1],stretch:[8656,61,8658],HDW:e.HDW2},v={c:8722,dir:n.H,sizes:[.778],stretch:[0,8722],HDW:[.583,.082,.778]},b={c:8739,dir:n.V,sizes:[1],stretch:[0,8739],HDW:[.75,.249,.278]},g={c:9180,dir:n.H,sizes:[.778,1],schar:[8994,8994],stretch:[57680,57684,57681],HDW:[.32,.2,.5]},M={c:9181,dir:n.H,sizes:[.778,1],schar:[8995,8995],stretch:[57682,57684,57683],HDW:[.32,.2,.5]},O={c:9182,dir:n.H,stretch:[57680,57684,57681,57685],HDW:[.32,.2,.5],min:1.8},x={c:9183,dir:n.H,stretch:[57682,57684,57683,57686],HDW:[.32,.2,.5],min:1.8},S={c:10216,dir:n.V,sizes:e.VSIZES},E={c:10217,dir:n.V,sizes:e.VSIZES},C={c:10502,dir:n.H,stretch:[8656,61,8739],HDW:e.HDW3,min:1.278},_={c:10503,dir:n.H,stretch:[8872,61,8658],HDW:e.HDW3,min:1.278};e.delimiters={40:{dir:n.V,sizes:e.VSIZES,stretch:[9115,9116,9117],HDW:[.75,.25,.875]},41:{dir:n.V,sizes:e.VSIZES,stretch:[9118,9119,9120],HDW:[.75,.25,.875]},45:v,47:i,61:{dir:n.H,sizes:[.767],stretch:[0,61],HDW:[.583,.082,.778]},91:{dir:n.V,sizes:e.VSIZES,stretch:[9121,9122,9123],HDW:e.HDW1},92:{dir:n.V,sizes:e.VSIZES},93:{dir:n.V,sizes:e.VSIZES,stretch:[9124,9125,9126],HDW:e.HDW1},94:a,95:c,123:{dir:n.V,sizes:e.VSIZES,stretch:[9127,9130,9129,9128],HDW:[.75,.25,.889]},124:{dir:n.V,sizes:[1],stretch:[0,8739],HDW:[.75,.249,.278]},125:{dir:n.V,sizes:e.VSIZES,stretch:[9131,9130,9133,9132],HDW:[.75,.25,.889]},126:s,175:o,710:a,713:o,732:s,770:a,771:s,818:c,8211:c,8212:c,8213:c,8214:{dir:n.V,sizes:[.602,1],schar:[0,8741],stretch:[0,8741],HDW:[.75,.25,.5]},8215:c,8254:o,8407:u,8592:l,8593:{dir:n.V,sizes:[.888],stretch:[8593,9168],HDW:[.694,.193,.667]},8594:u,8595:{dir:n.V,sizes:[.888],stretch:[0,9168,8595],HDW:[.694,.194,.667]},8596:h,8597:{dir:n.V,sizes:[1.044],stretch:[8593,9168,8595],HDW:[.772,.272,.667]},8606:{dir:n.H,sizes:[1],stretch:[8606,8722],HDW:e.HDW2},8608:{dir:n.H,sizes:[1],stretch:[0,8722,8608],HDW:e.HDW2},8612:f,8613:{dir:n.V,stretch:[8593,9168,8869],HDW:e.HDW1,min:1.555},8614:p,8615:{dir:n.V,stretch:[8868,9168,8595],HDW:e.HDW1,min:1.555},8624:{dir:n.V,sizes:[.722],stretch:[8624,9168],HDW:[.722,0,.667]},8625:{dir:n.V,sizes:[.722],stretch:[8625,9168],HDW:[.722,0,.667]},8636:{dir:n.H,sizes:[1],stretch:[8636,8722],HDW:e.HDW2},8637:{dir:n.H,sizes:[1],stretch:[8637,8722],HDW:e.HDW2},8638:{dir:n.V,sizes:[.888],stretch:[8638,9168],HDW:[.694,.194,.667]},8639:{dir:n.V,sizes:[.888],stretch:[8639,9168],HDW:[.694,.194,.667]},8640:{dir:n.H,sizes:[1],stretch:[0,8722,8640],HDW:e.HDW2},8641:{dir:n.H,sizes:[1],stretch:[0,8722,8641],HDW:e.HDW2},8642:{dir:n.V,sizes:[.888],stretch:[0,9168,8642],HDW:[.694,.194,.667]},8643:{dir:n.V,sizes:[.888],stretch:[0,9168,8643],HDW:[.694,.194,.667]},8656:d,8657:{dir:n.V,sizes:[.888],stretch:[8657,8214],HDW:[.694,.194,.778]},8658:m,8659:{dir:n.V,sizes:[.888],stretch:[0,8214,8659],HDW:[.694,.194,.778]},8660:y,8661:{dir:n.V,sizes:[1.044],stretch:[8657,8214,8659],HDW:[.772,.272,.778]},8666:{dir:n.H,sizes:[1],stretch:[8666,8801],HDW:[.464,-.036,1]},8667:{dir:n.H,sizes:[1],stretch:[0,8801,8667],HDW:[.464,-.036,1]},8722:v,8725:i,8730:{dir:n.V,sizes:e.VSIZES,stretch:[57345,57344,9143],HDW:[.8,.2,1.056]},8739:b,8741:{dir:n.V,sizes:[1],stretch:[0,8741],HDW:[.75,.25,.5]},8968:{dir:n.V,sizes:e.VSIZES,stretch:[9121,9122],HDW:e.HDW1},8969:{dir:n.V,sizes:e.VSIZES,stretch:[9124,9125],HDW:e.HDW1},8970:{dir:n.V,sizes:e.VSIZES,stretch:[0,9122,9123],HDW:e.HDW1},8971:{dir:n.V,sizes:e.VSIZES,stretch:[0,9125,9126],HDW:e.HDW1},8978:g,8994:g,8995:M,9001:S,9002:E,9130:{dir:n.V,sizes:[.32],stretch:[9130,9130,9130],HDW:[.29,.015,.889]},9135:c,9136:{dir:n.V,sizes:[.989],stretch:[9127,9130,9133],HDW:[.744,.244,.889]},9137:{dir:n.V,sizes:[.989],stretch:[9131,9130,9129],HDW:[.744,.244,.889]},9140:{dir:n.H,stretch:[9484,8722,9488],HDW:e.HDW3,min:1},9141:{dir:n.H,stretch:[9492,8722,9496],HDW:e.HDW3,min:1},9168:{dir:n.V,sizes:[.602,1],schar:[0,8739],stretch:[0,8739],HDW:[.602,0,.278]},9180:g,9181:M,9182:O,9183:x,9184:{dir:n.H,stretch:[714,713,715],HDW:[.59,-.544,.5],min:1},9185:{dir:n.H,stretch:[715,713,714],HDW:[.59,-.544,.5],min:1},9472:c,10072:b,10216:S,10217:E,10222:{dir:n.V,sizes:[.989],stretch:[9127,9130,9129],HDW:[.744,.244,.889]},10223:{dir:n.V,sizes:[.989],stretch:[9131,9130,9133],HDW:[.744,.244,.889]},10229:l,10230:u,10231:h,10232:d,10233:m,10234:y,10235:f,10236:p,10237:C,10238:_,10502:C,10503:_,10574:{dir:n.H,stretch:[8636,8722,8640],HDW:e.HDW3,min:2},10575:{dir:n.V,stretch:[8638,9168,8642],HDW:e.HDW1,min:1.776},10576:{dir:n.H,stretch:[8637,8722,8641],HDW:e.HDW3,min:2},10577:{dir:n.V,stretch:[8639,9168,8643],HDW:e.HDW1,min:.5},10586:{dir:n.H,stretch:[8636,8722,8739],HDW:e.HDW3,min:1.278},10587:{dir:n.H,stretch:[8739,8722,8640],HDW:e.HDW3,min:1.278},10588:{dir:n.V,stretch:[8638,9168,8869],HDW:e.HDW1,min:1.556},10589:{dir:n.V,stretch:[8868,9168,8642],HDW:e.HDW1,min:1.556},10590:{dir:n.H,stretch:[8637,8722,8739],HDW:e.HDW3,min:1.278},10591:{dir:n.H,stretch:[8739,8722,8641],HDW:e.HDW3,min:1.278},10592:{dir:n.V,stretch:[8639,9168,8869],HDW:e.HDW1,min:1.776},10593:{dir:n.V,stretch:[8868,9168,8643],HDW:e.HDW1,min:1.776},12296:S,12297:E,65079:O,65080:x}},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),l=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")},u=this&&this.__read||function(t,e){var r="function"==typeof Symbol&&t[Symbol.iterator];if(!r)return t;var n,i,o=r.call(t),a=[];try{for(;(void 0===e||0=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o=r(20),l=r(13),n=r(3),i=r(225),a=r(227),s=r(228),c="undefined"!=typeof window&&window.navigator&&"Mac"===window.navigator.platform.substr(0,3),h=(Object.defineProperty(f.prototype,"isLoading",{get:function(){return 0/g,">")},f.prototype.toMML=function(t){return this.MmlVisitor.visitTree(t.root,t,{texHints:this.settings.texHints,semantics:this.settings.semantics&&"MathML"!==t.inputJax.name})},f.prototype.zoom=function(t,e,r){t&&!this.isZoomEvent(t,e)||(this.menu.mathItem=r,t&&this.menu.post(t),this.zoomBox.post())},f.prototype.isZoomEvent=function(t,e){return this.settings.zoom===e&&(!this.settings.alt||t.altKey)&&(!this.settings.ctrl||t.ctrlKey)&&(!this.settings.cmd||t.metaKey)&&(!this.settings.shift||t.shiftKey)},f.prototype.rerender=function(t){void 0===t&&(t=l.STATE.TYPESET),this.rerenderStart=Math.min(t,this.rerenderStart),f.loading||(this.document.rerender(this.rerenderStart),this.rerenderStart=l.STATE.LAST)},f.prototype.copyMathML=function(){this.copyToClipboard(this.toMML(this.menu.mathItem))},f.prototype.copyOriginal=function(){this.copyToClipboard(this.menu.mathItem.math)},f.prototype.copyAnnotation=function(){this.copyToClipboard(this.menu.annotation)},f.prototype.copyToClipboard=function(t){var e=document.createElement("textarea");e.value=t,e.setAttribute("readonly",""),e.style.cssText="height: 1px; width: 1px; padding: 1px; position: absolute; left: -10px",document.body.appendChild(e),e.select();try{document.execCommand("copy")}catch(t){alert("Can't copy to clipboard: "+t.message)}document.body.removeChild(e)},f.prototype.addMenu=function(e){var r=this,t=e.typesetRoot;t.addEventListener("contextmenu",function(){return r.menu.mathItem=e},!0),t.addEventListener("keydown",function(){return r.menu.mathItem=e},!0),t.addEventListener("click",function(t){return r.zoom(t,"Click",e)},!0),t.addEventListener("dblclick",function(t){return r.zoom(t,"DoubleClick",e)},!0),this.menu.getStore().insert(t)},f.prototype.clear=function(){this.menu.getStore().clear()},f.prototype.variable=function(e,r){var n=this;return{name:e,getter:function(){return n.settings[e]},setter:function(t){n.settings[e]=t,r&&r(t),n.saveUserSettings()}}},f.prototype.a11yVar=function(r){var n=this;return{name:r,getter:function(){return n.getA11y(r)},setter:function(t){n.settings[r]=t;var e={};e[r]=t,n.setA11y(e),n.saveUserSettings()}}},f.prototype.submenu=function(t,e,r,n){var i,o;void 0===r&&(r=[]),void 0===n&&(n=!1);var a=[];try{for(var s=u(r),c=s.next();!c.done;c=s.next()){var l=c.value;Array.isArray(l)?a=a.concat(l):a.push(l)}}catch(t){i={error:t}}finally{try{c&&!c.done&&(o=s.return)&&o.call(s)}finally{if(i)throw i.error}}return{type:"submenu",id:t,content:e,menu:{items:a},disabled:0===a.length||n}},f.prototype.command=function(t,e,r,n){return void 0===n&&(n={}),Object.assign({type:"command",id:t,content:e,action:r},n)},f.prototype.checkbox=function(t,e,r,n){return void 0===n&&(n={}),Object.assign({type:"checkbox",id:t,content:e,variable:r},n)},f.prototype.radioGroup=function(e,t){var r=this;return t.map(function(t){return r.radio(t[0],t[1]||t[0],e)})},f.prototype.radio=function(t,e,r,n){return void 0===n&&(n={}),Object.assign({type:"radio",id:t,content:e,variable:r},n)},f.prototype.label=function(t,e){return{type:"label",id:t,content:e}},f.prototype.rule=function(){return{type:"rule"}},f.MENU_STORAGE="MathJax-Menu-Settings",f.OPTIONS={settings:{texHints:!0,semantics:!1,zoom:"NoZoom",zscale:"200%",renderer:"CHTML",alt:!1,cmd:!1,ctrl:!1,shift:!1,scale:1,autocollapse:!1,collapsible:!1,inTabOrder:!0,explorer:!1},jax:{CHTML:null,SVG:null},annotationTypes:n.expandable({TeX:["TeX","LaTeX","application/x-tex"],StarMath:["StarMath 5.0"],Maple:["Maple"],ContentMathML:["MathML-Content","application/mathml-content+xml"],OpenMath:["OpenMath"]})},f.loading=0,f.loadingPromises=new Map,f._loadingPromise=null,f._loadingOK=null,f._loadingFailed=null,f);function f(t,e){var r=this;void 0===e&&(e={}),this.settings=null,this.defaultSettings=null,this.menu=null,this.MmlVisitor=new a.MmlVisitor,this.jax={CHTML:null,SVG:null},this.rerenderStart=l.STATE.LAST,this.about=new ContextMenu.Info('MathJax v'+o.mathjax.version,function(){var t=[];return t.push("Input Jax: "+r.document.inputJax.map(function(t){return t.name}).join(", ")),t.push("Output Jax: "+r.document.outputJax.name),t.push("Document Type: "+r.document.kind),t.join("
")},'www.mathjax.org'),this.help=new ContextMenu.Info("MathJax Help",function(){return["

MathJax is a JavaScript library that allows page"," authors to include mathematics within their web pages."," As a reader, you don't need to do anything to make that happen.

","

Browsers: MathJax works with all modern browsers including"," Edge, Firefox, Chrome, Safari, Opera, and most mobile browsers.

","

Math Menu: MathJax adds a contextual menu to equations."," Right-click or CTRL-click on any mathematics to access the menu.

",'
',"

Show Math As: These options allow you to view the formula's"," source markup (as MathML or in its original format).

","

Copy to Clipboard: These options copy the formula's source markup,"," as MathML or in its original format, to the clipboard"," (in browsers that support that).

","

Math Settings: These give you control over features of MathJax,"," such the size of the mathematics, and the mechanism used"," to display equations.

","

Accessibility: MathJax can work with screen"," readers to make mathematics accessible to the visually impaired."," Turn on the explorer to enable generation of speech strings"," and the ability to investigate expressions interactively.

","

Language: This menu lets you select the language used by MathJax"," for its menus and warning messages. (Not yet implemented in version 3.)

","
","

Math Zoom: If you are having difficulty reading an"," equation, MathJax can enlarge it to help you see it better, or"," you can scall all the math on the page to make it larger."," Turn these features on in the Math Settings menu.

","

Preferences: MathJax uses your browser's localStorage database"," to save the preferences set via this menu locally in your browser. These"," are not used to track you, and are not transferred or used remotely by"," MathJax in any way.

"].join("\n")},'www.mathjax.org'),this.mathmlCode=new s.SelectableInfo("MathJax MathML Expression",function(){if(!r.menu.mathItem)return"";var t=r.toMML(r.menu.mathItem);return"
"+r.formatSource(t)+"
"},""),this.originalText=new s.SelectableInfo("MathJax Original Source",function(){if(!r.menu.mathItem)return"";var t=r.menu.mathItem.math;return'
'+r.formatSource(t)+"
"},""),this.annotationText=new s.SelectableInfo("MathJax Annotation Text",function(){if(!r.menu.mathItem)return"";var t=r.menu.annotation;return'
'+r.formatSource(t)+"
"},""),this.zoomBox=new ContextMenu.Info("MathJax Zoomed Expression",function(){if(!r.menu.mathItem)return"";var t=r.menu.mathItem.typesetRoot.cloneNode(!0);return t.style.margin="0",'
'+t.outerHTML+"
"},""),this.document=t,this.options=n.userOptions(n.defaultOptions({},this.constructor.OPTIONS),e),this.initSettings(),this.mergeUserSettings(),this.initMenu()}e.Menu=h},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(96),s=r(3),c=(o=a.SerializedMmlVisitor,i(l,o),l.prototype.visitTree=function(t,e,r){return void 0===e&&(e=null),void 0===r&&(r={}),this.mathItem=e,s.userOptions(this.options,r),this.visitNode(t,"")},l.prototype.visitTeXAtomNode=function(t,e){return this.options.texHints?o.prototype.visitTeXAtomNode.call(this,t,e):t.childNodes[0]&&1===t.childNodes[0].childNodes.length?this.visitNode(t.childNodes[0],e):e+"\n"+this.childNodeMml(t,e+" ","\n")+e+""},l.prototype.visitMathNode=function(t,e){if(!this.options.semantics||"TeX"!==this.mathItem.inputJax.name)return o.prototype.visitDefault.call(this,t,e);var r=t.childNodes.length&&1\n"+e+" \n"+(r?e+" \n":"")+this.childNodeMml(t,e+(r?" ":" "),"\n")+(r?e+" \n":"")+e+' '+this.mathItem.math+"\n"+e+" \n"+e+""},l);function l(){var t=null!==o&&o.apply(this,arguments)||this;return t.options={texHints:!0,semantics:!1},t.mathItem=null,t}e.MmlVisitor=c},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var o,a=(o=ContextMenu.Info,i(s,o),s.prototype.addEvents=function(t){var e=this;t.addEventListener("keypress",function(t){"a"===t.key&&(t.ctrlKey||t.metaKey)&&(e.selectAll(),e.stop(t))})},s.prototype.selectAll=function(){document.getSelection().selectAllChildren(this.getHtml().querySelector("pre"))},s.prototype.copyToClipboard=function(){this.selectAll();try{document.execCommand("copy")}catch(t){alert("Can't copy to clipboard: "+t.message)}document.getSelection().removeAllRanges()},s.prototype.generateHtml=function(){var e=this;o.prototype.generateHtml.call(this);var t=this.getHtml().querySelector("span."+ContextMenu.HtmlClasses.INFOSIGNATURE).appendChild(document.createElement("input"));t.type="button",t.value="Copy to Clipboard",t.addEventListener("click",function(t){return e.copyToClipboard()})},s);function s(){return null!==o&&o.apply(this,arguments)||this}e.SelectableInfo=a},function(t,e,r){"use strict";var n,o=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),a=this&&this.__assign||function(){return(a=Object.assign||function(t){for(var e,r=1,n=arguments.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var l=r(20),u=r(13),h=r(3),f=r(226);function p(t){return o(e,n=t),e.prototype.addMenu=function(t){this.state()=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(S,"__esModule",{value:!0});var t,c,r,o,l,n=E(5),a=E(24);function u(t){return r.visitTree(t,c.document)}function h(){r=new S.MathJax._.core.MmlTree.SerializedMmlVisitor.SerializedMmlVisitor,o=S.MathJax._.mathjax.mathjax,c.input=v(),c.output=b(),c.adaptor=g(),c.handler&&o.handlers.unregister(c.handler),c.handler=M(),c.handler&&(o.handlers.register(c.handler),c.document=O())}function f(){var e,t;c.input&&c.output&&p();var r=c.output?c.output.name.toLowerCase():"";try{for(var n=s(c.input),i=n.next();!i.done;i=n.next()){var o=i.value,a=o.name.toLowerCase();m(a,o),y(a,o),c.output&&d(a,r,o)}}catch(t){e={error:t}}finally{try{i&&!i.done&&(t=n.return)&&t.call(n)}finally{if(e)throw e.error}}}function p(){S.MathJax.typeset=function(t){void 0===t&&(t=null),c.document.options.elements=t,c.document.render()},S.MathJax.typesetPromise=function(t){return void 0===t&&(t=null),c.document.options.elements=t,o.handleRetriesFor(function(){c.document.render()})},S.MathJax.typesetClear=function(){return c.document.clear()}}function d(t,e,r){var n=t+"2"+e;S.MathJax[n]=function(t,e){return void 0===e&&(e={}),e.format=r.name,c.document.convert(t,e)},S.MathJax[n+"Promise"]=function(t,e){return void 0===e&&(e={}),e.format=r.name,o.handleRetriesFor(function(){return c.document.convert(t,e)})},S.MathJax[e+"Stylesheet"]=function(){return c.output.styleSheet(c.document)},"getMetricsFor"in c.output&&(S.MathJax.getMetricsFor=function(t,e){return c.output.getMetricsFor(t,e)})}function m(t,r){var n=S.MathJax._.core.MathItem.STATE;S.MathJax[t+"2mml"]=function(t,e){return void 0===e&&(e={}),e.end=n.CONVERT,e.format=r.name,u(c.document.convert(t,e))},S.MathJax[t+"2mmlPromise"]=function(t,e){return void 0===e&&(e={}),e.end=n.CONVERT,e.format=r.name,o.handleRetriesFor(function(){return u(c.document.convert(t,e))})}}function y(t,e){"tex"===t&&(S.MathJax.texReset=function(t){return void 0===t&&(t=0),e.parseOptions.tags.reset(t)})}function v(){var e,t,r=[];try{for(var n=s(S.CONFIG.input),i=n.next();!i.done;i=n.next()){var o=i.value,a=c.constructors[o];if(!a)throw Error('Input Jax "'+o+'" is not defined (has it been loaded?)');r.push(new a(S.MathJax.config[o]))}}catch(t){e={error:t}}finally{try{i&&!i.done&&(t=n.return)&&t.call(n)}finally{if(e)throw e.error}}return r}function b(){var t=S.CONFIG.output;if(!t)return null;var e=c.constructors[t];if(!e)throw Error('Output Jax "'+t+'" is not defined (has it been loaded?)');return new e(S.MathJax.config[t])}function g(){var t=S.CONFIG.adaptor;if(!t||"none"===t)return null;var e=c.constructors[t];if(!e)throw Error('DOMAdaptor "'+t+'" is not defined (has it been loaded?)');return e(S.MathJax.config[t])}function M(){var e,t,r=S.CONFIG.handler;if(!r||"none"===r||!c.adaptor)return null;var n=c.constructors[r];if(!n)throw Error('Handler "'+r+'" is not defined (has it been loaded?)');var i=new n(c.adaptor,5);try{for(var o=s(l),a=o.next();!a.done;a=o.next()){i=a.value.item(i)}}catch(t){e={error:t}}finally{try{a&&!a.done&&(t=o.return)&&t.call(o)}finally{if(e)throw e.error}}return i}function O(t){return void 0===t&&(t=null),o.document(t||S.CONFIG.document,e(e({},S.MathJax.config.options),{InputJax:c.input,OutputJax:c.output}))}c=t=S.Startup||(S.Startup={}),l=new a.PrioritizedList,c.constructors={},c.input=[],c.output=null,c.handler=null,c.adaptor=null,c.elements=null,c.document=null,c.promise=new Promise(function(t,e){var r=i.document;if(r&&r.readyState&&"complete"!==r.readyState&&"interactive"!==r.readyState){var n=function(){return t()};r.defaultView.addEventListener("load",n,!0),r.defaultView.addEventListener("DOMContentLoaded",n,!0)}else t()}),c.toMML=u,c.registerConstructor=function(t,e){c.constructors[t]=e},c.useHandler=function(t,e){void 0===e&&(e=!1),S.CONFIG.handler&&!e||(S.CONFIG.handler=t)},c.useAdaptor=function(t,e){void 0===e&&(e=!1),S.CONFIG.adaptor&&!e||(S.CONFIG.adaptor=t)},c.useInput=function(t,e){void 0===e&&(e=!1),x&&!e||S.CONFIG.input.push(t)},c.useOutput=function(t,e){void 0===e&&(e=!1),S.CONFIG.output&&!e||(S.CONFIG.output=t)},c.extendHandler=function(t,e){void 0===e&&(e=10),l.add(t,e)},c.defaultReady=function(){h(),f(),c.promise=c.promise.then(function(){return S.CONFIG.pageReady()})},c.defaultPageReady=function(){return S.CONFIG.typeset&&S.MathJax.typesetPromise?S.MathJax.typesetPromise():null},c.getComponents=h,c.makeMethods=f,c.makeTypesetMethods=p,c.makeOutputMethods=d,c.makeMmlMethods=m,c.makeResetMethod=y,c.getInputJax=v,c.getOutputJax=b,c.getAdaptor=g,c.getHandler=M,c.getDocument=O,S.MathJax=n.MathJax,void 0===S.MathJax._.startup&&(n.combineDefaults(S.MathJax.config,"startup",{input:[],output:"",handler:null,adaptor:null,document:"undefined"==typeof document?"":document,elements:null,typeset:!0,ready:t.defaultReady.bind(t),pageReady:t.defaultPageReady.bind(t)}),n.combineWithMathJax({startup:t,options:{}})),S.CONFIG=S.MathJax.config.startup;var x=0!==S.CONFIG.input.length}).call(this,E(28))},function(t,e,r){"use strict";r(17).Loader.preLoad("loader","startup","core","input/tex","input/mml","output/chtml","output/chtml/fonts/tex.js","ui/menu")},function(t,e,r){"use strict";r(234);var n=r(70),i=r(81);MathJax.startup&&(MathJax.startup.registerConstructor("HTMLHandler",n.HTMLHandler),MathJax.startup.registerConstructor("browserAdaptor",i.browserAdaptor),MathJax.startup.useHandler("HTMLHandler"),MathJax.startup.useAdaptor("browserAdaptor")),MathJax.loader&&(MathJax._.mathjax.mathjax.asyncLoad=function(t){return MathJax.loader.load(t)})},function(t,e,r){"use strict";var n=r(5),i=Ct(n),o=Ct(r(79)),a=Ct(r(81)),s=Ct(r(80)),c=Ct(r(40)),l=Ct(r(82)),u=Ct(r(94)),h=Ct(r(29)),f=Ct(r(41)),p=Ct(r(13)),d=Ct(r(43)),m=Ct(r(19)),y=Ct(r(85)),v=Ct(r(235)),b=Ct(r(44)),g=Ct(r(0)),M=Ct(r(67)),O=Ct(r(59)),x=Ct(r(90)),S=Ct(r(91)),E=Ct(r(46)),C=Ct(r(92)),_=Ct(r(58)),T=Ct(r(88)),w=Ct(r(57)),A=Ct(r(53)),k=Ct(r(65)),I=Ct(r(47)),L=Ct(r(61)),N=Ct(r(48)),P=Ct(r(26)),B=Ct(r(56)),R=Ct(r(89)),j=Ct(r(55)),H=Ct(r(52)),D=Ct(r(51)),X=Ct(r(50)),F=Ct(r(54)),W=Ct(r(87)),J=Ct(r(31)),q=Ct(r(62)),V=Ct(r(64)),U=Ct(r(49)),z=Ct(r(63)),G=Ct(r(60)),K=Ct(r(66)),Z=Ct(r(68)),Y=Ct(r(86)),$=Ct(r(96)),Q=Ct(r(42)),tt=Ct(r(30)),et=Ct(r(45)),rt=Ct(r(84)),nt=Ct(r(95)),it=Ct(r(97)),ot=Ct(r(98)),at=Ct(r(236)),st=Ct(r(99)),ct=Ct(r(102)),lt=Ct(r(70)),ut=Ct(r(100)),ht=Ct(r(101)),ft=Ct(r(20)),pt=Ct(r(103)),dt=Ct(r(93)),mt=Ct(r(12)),yt=Ct(r(25)),vt=Ct(r(83)),bt=Ct(r(3)),gt=Ct(r(24)),Mt=Ct(r(69)),Ot=Ct(r(71)),xt=Ct(r(14)),St=Ct(r(104)),Et=Ct(r(10));function Ct(t){if(t&&t.__esModule)return t;var e={};if(null!=t)for(var r in t)Object.prototype.hasOwnProperty.call(t,r)&&(e[r]=t[r]);return e.default=t,e}(0,n.combineWithMathJax)({_:{adaptors:{HTMLAdaptor:o,browserAdaptor:a},components:{global:i},core:{DOMAdaptor:s,FindMath:c,Handler:l,HandlerList:u,InputJax:h,MathDocument:f,MathItem:p,MathList:d,MmlTree:{Attributes:m,MML:y,MathMLVisitor:v,MmlFactory:b,MmlNode:g,MmlNodes:{TeXAtom:M,maction:O,maligngroup:x,malignmark:S,math:E,mathchoice:C,menclose:_,merror:T,mfenced:w,mfrac:A,mglyph:k,mi:I,mmultiscripts:L,mn:N,mo:P,mpadded:B,mphantom:R,mroot:j,mrow:H,ms:D,mspace:X,msqrt:F,mstyle:W,msubsup:J,mtable:q,mtd:V,mtext:U,mtr:z,munderover:G,semantics:K},MmlVisitor:Z,OperatorDictionary:Y,SerializedMmlVisitor:$},OutputJax:Q,Tree:{Factory:tt,Node:et,NodeFactory:rt,Visitor:nt,Wrapper:it,WrapperFactory:ot}},handlers:{html_ts:at,html:{HTMLDocument:st,HTMLDomStrings:ct,HTMLHandler:lt,HTMLMathItem:ut,HTMLMathList:ht}},mathjax:ft,util:{AsyncLoad:pt,BitField:dt,Entities:mt,FunctionList:yt,LinkedList:vt,Options:bt,PrioritizedList:gt,Retries:Mt,Styles:Ot,lengths:xt,numeric:St,string:Et}}})},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),l=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(68),s=(o=a.MmlVisitor,i(c,o),c.prototype.visitTree=function(t,e){var r=(this.document=e).createElement("top");return this.visitNode(t,r),this.document=null,r.firstChild},c.prototype.visitTextNode=function(t,e){e.appendChild(this.document.createTextNode(t.getText()))},c.prototype.visitXMLNode=function(t,e){e.appendChild(t.getXML().cloneNode(!0))},c.prototype.visitInferredMrowNode=function(t,e){var r,n;try{for(var i=l(t.childNodes),o=i.next();!o.done;o=i.next()){var a=o.value;this.visitNode(a,e)}}catch(t){r={error:t}}finally{try{o&&!o.done&&(n=i.return)&&n.call(i)}finally{if(r)throw r.error}}},c.prototype.visitDefault=function(t,e){var r,n,i=this.document.createElement(t.kind);this.addAttributes(t,i);try{for(var o=l(t.childNodes),a=o.next();!a.done;a=o.next()){var s=a.value;this.visitNode(s,i)}}catch(t){r={error:t}}finally{try{a&&!a.done&&(n=o.return)&&n.call(o)}finally{if(r)throw r.error}}e.appendChild(i)},c.prototype.addAttributes=function(t,e){var r,n,i=t.attributes,o=i.getExplicitNames();try{for(var a=l(o),s=a.next();!s.done;s=a.next()){var c=s.value;e.setAttribute(c,i.getExplicit(c).toString())}}catch(t){r={error:t}}finally{try{s&&!s.done&&(n=a.return)&&n.call(a)}finally{if(r)throw r.error}}},c);function c(){var t=null!==o&&o.apply(this,arguments)||this;return t.document=null,t}e.MathMLVisitor=s},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(20),i=r(70);e.RegisterHTMLHandler=function(t){var e=new i.HTMLHandler(t);return n.mathjax.handlers.register(e),e}},function(t,e,r){"use strict";r(238);var n=r(249);r(17).Loader.preLoad("input/tex-base","[tex]/ams","[tex]/newcommand","[tex]/noundefined","[tex]/require","[tex]/autoload","[tex]/configMacros"),(0,n.registerTeX)(["base","ams","newcommand","noundefined","require","autoload","configMacros"])},function(t,e,r){"use strict";var n=r(5),i=j(r(105)),o=j(r(11)),a=j(r(107)),s=j(r(106)),c=j(r(8)),l=j(r(112)),u=j(r(6)),h=j(r(33)),f=j(r(110)),p=j(r(7)),d=j(r(109)),m=j(r(32)),y=j(r(111)),v=j(r(22)),b=j(r(9)),g=j(r(27)),M=j(r(15)),O=j(r(4)),x=j(r(21)),S=j(r(242)),E=j(r(114)),C=j(r(115)),_=j(r(244)),T=j(r(113)),w=j(r(34)),A=j(r(35)),k=j(r(245)),I=j(r(246)),L=j(r(118)),N=j(r(72)),P=j(r(117)),B=j(r(248)),R=j(r(116));function j(t){if(t&&t.__esModule)return t;var e={};if(null!=t)for(var r in t)Object.prototype.hasOwnProperty.call(t,r)&&(e[r]=t[r]);return e.default=t,e}(0,n.combineWithMathJax)({_:{input:{tex_ts:i,tex:{Configuration:o,FilterUtil:a,FindTeX:s,MapHandler:c,NodeFactory:l,NodeUtil:u,ParseMethods:h,ParseOptions:f,ParseUtil:p,Stack:d,StackItem:m,StackItemFactory:y,Symbol:v,SymbolMap:b,Tags:g,TexConstants:M,TexError:O,TexParser:x,ams:{AmsConfiguration:S,AmsItems:E,AmsMethods:C},autoload:{AutoloadConfiguration:_},base:{BaseConfiguration:T,BaseItems:w,BaseMethods:A},config_macros:{ConfigMacrosConfiguration:k},newcommand:{NewcommandConfiguration:I,NewcommandItems:L,NewcommandMethods:N,NewcommandUtil:P},noundefined:{NoUndefinedConfiguration:B},require:{RequireConfiguration:R}}}}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(9),i=r(15),o=r(35),a=r(33),s=r(0);new n.RegExpMap("letter",a.default.variable,/[a-z]/i),new n.RegExpMap("digit",a.default.digit,/[0-9.,]/),new n.RegExpMap("command",a.default.controlSequence,/^\\/),new n.MacroMap("special",{"{":"Open","}":"Close","~":"Tilde","^":"Superscript",_:"Subscript"," ":"Space","\t":"Space","\r":"Space","\n":"Space","'":"Prime","%":"Comment","&":"Entry","#":"Hash","\xa0":"Space","\u2019":"Prime"},o.default),new n.CharacterMap("mathchar0mi",a.default.mathchar0mi,{alpha:"\u03b1",beta:"\u03b2",gamma:"\u03b3",delta:"\u03b4",epsilon:"\u03f5",zeta:"\u03b6",eta:"\u03b7",theta:"\u03b8",iota:"\u03b9",kappa:"\u03ba",lambda:"\u03bb",mu:"\u03bc",nu:"\u03bd",xi:"\u03be",omicron:"\u03bf",pi:"\u03c0",rho:"\u03c1",sigma:"\u03c3",tau:"\u03c4",upsilon:"\u03c5",phi:"\u03d5",chi:"\u03c7",psi:"\u03c8",omega:"\u03c9",varepsilon:"\u03b5",vartheta:"\u03d1",varpi:"\u03d6",varrho:"\u03f1",varsigma:"\u03c2",varphi:"\u03c6",S:["\xa7",{mathvariant:i.TexConstant.Variant.NORMAL}],aleph:["\u2135",{mathvariant:i.TexConstant.Variant.NORMAL}],hbar:["\u210f",{variantForm:!0}],imath:"\u0131",jmath:"\u0237",ell:"\u2113",wp:["\u2118",{mathvariant:i.TexConstant.Variant.NORMAL}],Re:["\u211c",{mathvariant:i.TexConstant.Variant.NORMAL}],Im:["\u2111",{mathvariant:i.TexConstant.Variant.NORMAL}],partial:["\u2202",{mathvariant:i.TexConstant.Variant.NORMAL}],infty:["\u221e",{mathvariant:i.TexConstant.Variant.NORMAL}],prime:["\u2032",{mathvariant:i.TexConstant.Variant.NORMAL,variantForm:!0}],emptyset:["\u2205",{mathvariant:i.TexConstant.Variant.NORMAL}],nabla:["\u2207",{mathvariant:i.TexConstant.Variant.NORMAL}],top:["\u22a4",{mathvariant:i.TexConstant.Variant.NORMAL}],bot:["\u22a5",{mathvariant:i.TexConstant.Variant.NORMAL}],angle:["\u2220",{mathvariant:i.TexConstant.Variant.NORMAL}],triangle:["\u25b3",{mathvariant:i.TexConstant.Variant.NORMAL}],backslash:["\u2216",{mathvariant:i.TexConstant.Variant.NORMAL,variantForm:!0}],forall:["\u2200",{mathvariant:i.TexConstant.Variant.NORMAL}],exists:["\u2203",{mathvariant:i.TexConstant.Variant.NORMAL}],neg:["\xac",{mathvariant:i.TexConstant.Variant.NORMAL}],lnot:["\xac",{mathvariant:i.TexConstant.Variant.NORMAL}],flat:["\u266d",{mathvariant:i.TexConstant.Variant.NORMAL}],natural:["\u266e",{mathvariant:i.TexConstant.Variant.NORMAL}],sharp:["\u266f",{mathvariant:i.TexConstant.Variant.NORMAL}],clubsuit:["\u2663",{mathvariant:i.TexConstant.Variant.NORMAL}],diamondsuit:["\u2662",{mathvariant:i.TexConstant.Variant.NORMAL}],heartsuit:["\u2661",{mathvariant:i.TexConstant.Variant.NORMAL}],spadesuit:["\u2660",{mathvariant:i.TexConstant.Variant.NORMAL}]}),new n.CharacterMap("mathchar0mo",a.default.mathchar0mo,{surd:"\u221a",coprod:["\u2210",{texClass:s.TEXCLASS.OP,movesupsub:!0}],bigvee:["\u22c1",{texClass:s.TEXCLASS.OP,movesupsub:!0}],bigwedge:["\u22c0",{texClass:s.TEXCLASS.OP,movesupsub:!0}],biguplus:["\u2a04",{texClass:s.TEXCLASS.OP,movesupsub:!0}],bigcap:["\u22c2",{texClass:s.TEXCLASS.OP,movesupsub:!0}],bigcup:["\u22c3",{texClass:s.TEXCLASS.OP,movesupsub:!0}],int:["\u222b",{texClass:s.TEXCLASS.OP}],intop:["\u222b",{texClass:s.TEXCLASS.OP,movesupsub:!0,movablelimits:!0}],iint:["\u222c",{texClass:s.TEXCLASS.OP}],iiint:["\u222d",{texClass:s.TEXCLASS.OP}],prod:["\u220f",{texClass:s.TEXCLASS.OP,movesupsub:!0}],sum:["\u2211",{texClass:s.TEXCLASS.OP,movesupsub:!0}],bigotimes:["\u2a02",{texClass:s.TEXCLASS.OP,movesupsub:!0}],bigoplus:["\u2a01",{texClass:s.TEXCLASS.OP,movesupsub:!0}],bigodot:["\u2a00",{texClass:s.TEXCLASS.OP,movesupsub:!0}],oint:["\u222e",{texClass:s.TEXCLASS.OP}],bigsqcup:["\u2a06",{texClass:s.TEXCLASS.OP,movesupsub:!0}],smallint:["\u222b",{largeop:!1}],triangleleft:"\u25c3",triangleright:"\u25b9",bigtriangleup:"\u25b3",bigtriangledown:"\u25bd",wedge:"\u2227",land:"\u2227",vee:"\u2228",lor:"\u2228",cap:"\u2229",cup:"\u222a",ddagger:"\u2021",dagger:"\u2020",sqcap:"\u2293",sqcup:"\u2294",uplus:"\u228e",amalg:"\u2a3f",diamond:"\u22c4",bullet:"\u2219",wr:"\u2240",div:"\xf7",odot:["\u2299",{largeop:!1}],oslash:["\u2298",{largeop:!1}],otimes:["\u2297",{largeop:!1}],ominus:["\u2296",{largeop:!1}],oplus:["\u2295",{largeop:!1}],mp:"\u2213",pm:"\xb1",circ:"\u2218",bigcirc:"\u25ef",setminus:["\u2216",{variantForm:!0}],cdot:"\u22c5",ast:"\u2217",times:"\xd7",star:"\u22c6",propto:"\u221d",sqsubseteq:"\u2291",sqsupseteq:"\u2292",parallel:"\u2225",mid:"\u2223",dashv:"\u22a3",vdash:"\u22a2",leq:"\u2264",le:"\u2264",geq:"\u2265",ge:"\u2265",lt:"<",gt:">",succ:"\u227b",prec:"\u227a",approx:"\u2248",succeq:"\u2ab0",preceq:"\u2aaf",supset:"\u2283",subset:"\u2282",supseteq:"\u2287",subseteq:"\u2286",in:"\u2208",ni:"\u220b",notin:"\u2209",owns:"\u220b",gg:"\u226b",ll:"\u226a",sim:"\u223c",simeq:"\u2243",perp:"\u22a5",equiv:"\u2261",asymp:"\u224d",smile:"\u2323",frown:"\u2322",ne:"\u2260",neq:"\u2260",cong:"\u2245",doteq:"\u2250",bowtie:"\u22c8",models:"\u22a8",notChar:"\u29f8",Leftrightarrow:"\u21d4",Leftarrow:"\u21d0",Rightarrow:"\u21d2",leftrightarrow:"\u2194",leftarrow:"\u2190",gets:"\u2190",rightarrow:"\u2192",to:"\u2192",mapsto:"\u21a6",leftharpoonup:"\u21bc",leftharpoondown:"\u21bd",rightharpoonup:"\u21c0",rightharpoondown:"\u21c1",nearrow:"\u2197",searrow:"\u2198",nwarrow:"\u2196",swarrow:"\u2199",rightleftharpoons:"\u21cc",hookrightarrow:"\u21aa",hookleftarrow:"\u21a9",longleftarrow:"\u27f5",Longleftarrow:"\u27f8",longrightarrow:"\u27f6",Longrightarrow:"\u27f9",Longleftrightarrow:"\u27fa",longleftrightarrow:"\u27f7",longmapsto:"\u27fc",ldots:"\u2026",cdots:"\u22ef",vdots:"\u22ee",ddots:"\u22f1",dotsc:"\u2026",dotsb:"\u22ef",dotsm:"\u22ef",dotsi:"\u22ef",dotso:"\u2026",ldotp:[".",{texClass:s.TEXCLASS.PUNCT}],cdotp:["\u22c5",{texClass:s.TEXCLASS.PUNCT}],colon:[":",{texClass:s.TEXCLASS.PUNCT}]}),new n.CharacterMap("mathchar7",a.default.mathchar7,{Gamma:"\u0393",Delta:"\u0394",Theta:"\u0398",Lambda:"\u039b",Xi:"\u039e",Pi:"\u03a0",Sigma:"\u03a3",Upsilon:"\u03a5",Phi:"\u03a6",Psi:"\u03a8",Omega:"\u03a9",_:"_","#":"#",$:"$","%":"%","&":"&",And:"&"}),new n.DelimiterMap("delimiter",a.default.delimiter,{"(":"(",")":")","[":"[","]":"]","<":"\u27e8",">":"\u27e9","\\lt":"\u27e8","\\gt":"\u27e9","/":"/","|":["|",{texClass:s.TEXCLASS.ORD}],".":"","\\\\":"\\","\\lmoustache":"\u23b0","\\rmoustache":"\u23b1","\\lgroup":"\u27ee","\\rgroup":"\u27ef","\\arrowvert":"\u23d0","\\Arrowvert":"\u2016","\\bracevert":"\u23aa","\\Vert":["\u2225",{texClass:s.TEXCLASS.ORD}],"\\|":["\u2225",{texClass:s.TEXCLASS.ORD}],"\\vert":["|",{texClass:s.TEXCLASS.ORD}],"\\uparrow":"\u2191","\\downarrow":"\u2193","\\updownarrow":"\u2195","\\Uparrow":"\u21d1","\\Downarrow":"\u21d3","\\Updownarrow":"\u21d5","\\backslash":"\\","\\rangle":"\u27e9","\\langle":"\u27e8","\\rbrace":"}","\\lbrace":"{","\\}":"}","\\{":"{","\\rceil":"\u2309","\\lceil":"\u2308","\\rfloor":"\u230b","\\lfloor":"\u230a","\\lbrack":"[","\\rbrack":"]"}),new n.CommandMap("macros",{displaystyle:["SetStyle","D",!0,0],textstyle:["SetStyle","T",!1,0],scriptstyle:["SetStyle","S",!1,1],scriptscriptstyle:["SetStyle","SS",!1,2],rm:["SetFont",i.TexConstant.Variant.NORMAL],mit:["SetFont",i.TexConstant.Variant.ITALIC],oldstyle:["SetFont",i.TexConstant.Variant.OLDSTYLE],cal:["SetFont",i.TexConstant.Variant.CALLIGRAPHIC],it:["SetFont","-tex-mathit"],bf:["SetFont",i.TexConstant.Variant.BOLD],bbFont:["SetFont",i.TexConstant.Variant.DOUBLESTRUCK],scr:["SetFont",i.TexConstant.Variant.SCRIPT],frak:["SetFont",i.TexConstant.Variant.FRAKTUR],sf:["SetFont",i.TexConstant.Variant.SANSSERIF],tt:["SetFont",i.TexConstant.Variant.MONOSPACE],tiny:["SetSize",.5],Tiny:["SetSize",.6],scriptsize:["SetSize",.7],small:["SetSize",.85],normalsize:["SetSize",1],large:["SetSize",1.2],Large:["SetSize",1.44],LARGE:["SetSize",1.73],huge:["SetSize",2.07],Huge:["SetSize",2.49],arcsin:["NamedFn"],arccos:["NamedFn"],arctan:["NamedFn"],arg:["NamedFn"],cos:["NamedFn"],cosh:["NamedFn"],cot:["NamedFn"],coth:["NamedFn"],csc:["NamedFn"],deg:["NamedFn"],det:"NamedOp",dim:["NamedFn"],exp:["NamedFn"],gcd:"NamedOp",hom:["NamedFn"],inf:"NamedOp",ker:["NamedFn"],lg:["NamedFn"],lim:"NamedOp",liminf:["NamedOp","lim inf"],limsup:["NamedOp","lim sup"],ln:["NamedFn"],log:["NamedFn"],max:"NamedOp",min:"NamedOp",Pr:"NamedOp",sec:["NamedFn"],sin:["NamedFn"],sinh:["NamedFn"],sup:"NamedOp",tan:["NamedFn"],tanh:["NamedFn"],limits:["Limits",1],nolimits:["Limits",0],overline:["UnderOver","00AF",null,1],underline:["UnderOver","005F"],overbrace:["UnderOver","23DE",1],underbrace:["UnderOver","23DF",1],overparen:["UnderOver","23DC"],underparen:["UnderOver","23DD"],overrightarrow:["UnderOver","2192"],underrightarrow:["UnderOver","2192"],overleftarrow:["UnderOver","2190"],underleftarrow:["UnderOver","2190"],overleftrightarrow:["UnderOver","2194"],underleftrightarrow:["UnderOver","2194"],overset:"Overset",underset:"Underset",stackrel:["Macro","\\mathrel{\\mathop{#2}\\limits^{#1}}",2],over:"Over",overwithdelims:"Over",atop:"Over",atopwithdelims:"Over",above:"Over",abovewithdelims:"Over",brace:["Over","{","}"],brack:["Over","[","]"],choose:["Over","(",")"],frac:"Frac",sqrt:"Sqrt",root:"Root",uproot:["MoveRoot","upRoot"],leftroot:["MoveRoot","leftRoot"],left:"LeftRight",right:"LeftRight",middle:"Middle",llap:"Lap",rlap:"Lap",raise:"RaiseLower",lower:"RaiseLower",moveleft:"MoveLeftRight",moveright:"MoveLeftRight",",":["Spacer",i.TexConstant.Length.THINMATHSPACE],":":["Spacer",i.TexConstant.Length.MEDIUMMATHSPACE],">":["Spacer",i.TexConstant.Length.MEDIUMMATHSPACE],";":["Spacer",i.TexConstant.Length.THICKMATHSPACE],"!":["Spacer",i.TexConstant.Length.NEGATIVETHINMATHSPACE],enspace:["Spacer",".5em"],quad:["Spacer","1em"],qquad:["Spacer","2em"],thinspace:["Spacer",i.TexConstant.Length.THINMATHSPACE],negthinspace:["Spacer",i.TexConstant.Length.NEGATIVETHINMATHSPACE],hskip:"Hskip",hspace:"Hskip",kern:"Hskip",mskip:"Hskip",mspace:"Hskip",mkern:"Hskip",rule:"rule",Rule:["Rule"],Space:["Rule","blank"],big:["MakeBig",s.TEXCLASS.ORD,.85],Big:["MakeBig",s.TEXCLASS.ORD,1.15],bigg:["MakeBig",s.TEXCLASS.ORD,1.45],Bigg:["MakeBig",s.TEXCLASS.ORD,1.75],bigl:["MakeBig",s.TEXCLASS.OPEN,.85],Bigl:["MakeBig",s.TEXCLASS.OPEN,1.15],biggl:["MakeBig",s.TEXCLASS.OPEN,1.45],Biggl:["MakeBig",s.TEXCLASS.OPEN,1.75],bigr:["MakeBig",s.TEXCLASS.CLOSE,.85],Bigr:["MakeBig",s.TEXCLASS.CLOSE,1.15],biggr:["MakeBig",s.TEXCLASS.CLOSE,1.45],Biggr:["MakeBig",s.TEXCLASS.CLOSE,1.75],bigm:["MakeBig",s.TEXCLASS.REL,.85],Bigm:["MakeBig",s.TEXCLASS.REL,1.15],biggm:["MakeBig",s.TEXCLASS.REL,1.45],Biggm:["MakeBig",s.TEXCLASS.REL,1.75],mathord:["TeXAtom",s.TEXCLASS.ORD],mathop:["TeXAtom",s.TEXCLASS.OP],mathopen:["TeXAtom",s.TEXCLASS.OPEN],mathclose:["TeXAtom",s.TEXCLASS.CLOSE],mathbin:["TeXAtom",s.TEXCLASS.BIN],mathrel:["TeXAtom",s.TEXCLASS.REL],mathpunct:["TeXAtom",s.TEXCLASS.PUNCT],mathinner:["TeXAtom",s.TEXCLASS.INNER],vcenter:["TeXAtom",s.TEXCLASS.VCENTER],buildrel:"BuildRel",hbox:["HBox",0],text:"HBox",mbox:["HBox",0],fbox:"FBox",strut:"Strut",mathstrut:["Macro","\\vphantom{(}"],phantom:"Phantom",vphantom:["Phantom",1,0],hphantom:["Phantom",0,1],smash:"Smash",acute:["Accent","00B4"],grave:["Accent","0060"],ddot:["Accent","00A8"],tilde:["Accent","007E"],bar:["Accent","00AF"],breve:["Accent","02D8"],check:["Accent","02C7"],hat:["Accent","005E"],vec:["Accent","2192"],dot:["Accent","02D9"],widetilde:["Accent","007E",1],widehat:["Accent","005E",1],matrix:"Matrix",array:"Matrix",pmatrix:["Matrix","(",")"],cases:["Matrix","{","","left left",null,".1em",null,!0],eqalign:["Matrix",null,null,"right left",i.TexConstant.Length.THICKMATHSPACE,".5em","D"],displaylines:["Matrix",null,null,"center",null,".5em","D"],cr:"Cr","\\":"CrLaTeX",newline:"Cr",hline:["HLine","solid"],hdashline:["HLine","dashed"],eqalignno:["Matrix",null,null,"right left",i.TexConstant.Length.THICKMATHSPACE,".5em","D",null,"right"],leqalignno:["Matrix",null,null,"right left",i.TexConstant.Length.THICKMATHSPACE,".5em","D",null,"left"],hfill:"HFill",hfil:"HFill",hfilll:"HFill",bmod:["Macro",'\\mmlToken{mo}[lspace="thickmathspace" rspace="thickmathspace"]{mod}'],pmod:["Macro","\\pod{\\mmlToken{mi}{mod}\\kern 6mu #1}",1],mod:["Macro","\\mathchoice{\\kern18mu}{\\kern12mu}{\\kern12mu}{\\kern12mu}\\mmlToken{mi}{mod}\\,\\,#1",1],pod:["Macro","\\mathchoice{\\kern18mu}{\\kern8mu}{\\kern8mu}{\\kern8mu}(#1)",1],iff:["Macro","\\;\\Longleftrightarrow\\;"],skew:["Macro","{{#2{#3\\mkern#1mu}\\mkern-#1mu}{}}",3],mathcal:["Macro","{\\cal #1}",1],mathscr:["Macro","{\\scr #1}",1],mathrm:["Macro","{\\rm #1}",1],mathbf:["Macro","{\\bf #1}",1],mathbb:["Macro","{\\bbFont #1}",1],Bbb:["Macro","{\\bbFont #1}",1],mathit:["Macro","{\\it #1}",1],mathfrak:["Macro","{\\frak #1}",1],mathsf:["Macro","{\\sf #1}",1],mathtt:["Macro","{\\tt #1}",1],textrm:["Macro","\\mathord{\\rm\\text{#1}}",1],textit:["Macro","\\mathord{\\it\\text{#1}}",1],textbf:["Macro","\\mathord{\\bf\\text{#1}}",1],textsf:["Macro","\\mathord{\\sf\\text{#1}}",1],texttt:["Macro","\\mathord{\\tt\\text{#1}}",1],pmb:["Macro","\\rlap{#1}\\kern1px{#1}",1],TeX:["Macro","T\\kern-.14em\\lower.5ex{E}\\kern-.115em X"],LaTeX:["Macro","L\\kern-.325em\\raise.21em{\\scriptstyle{A}}\\kern-.17em\\TeX"]," ":["Macro","\\text{ }"],not:"Not",dots:"Dots",space:"Tilde","\xa0":"Tilde",begin:"BeginEnd",end:"BeginEnd",label:"HandleLabel",ref:"HandleRef",nonumber:"HandleNoTag",mathchoice:"MathChoice",mmlToken:"MmlToken"},o.default);new n.EnvironmentMap("environment",a.default.environment,{array:["AlignedArray"],equation:["Equation",null,!0],"equation*":["Equation",null,!1],eqnarray:["EqnArray",null,!0,!0,"rcl","0 "+i.TexConstant.Length.THICKMATHSPACE,".5em"]},o.default);new n.CharacterMap("not_remap",null,{"\u2190":"\u219a","\u2192":"\u219b","\u2194":"\u21ae","\u21d0":"\u21cd","\u21d2":"\u21cf","\u21d4":"\u21ce","\u2208":"\u2209","\u220b":"\u220c","\u2223":"\u2224","\u2225":"\u2226","\u223c":"\u2241","~":"\u2241","\u2243":"\u2244","\u2245":"\u2247","\u2248":"\u2249","\u224d":"\u226d","=":"\u2260","\u2261":"\u2262","<":"\u226e",">":"\u226f","\u2264":"\u2270","\u2265":"\u2271","\u2272":"\u2274","\u2273":"\u2275","\u2276":"\u2278","\u2277":"\u2279","\u227a":"\u2280","\u227b":"\u2281","\u2282":"\u2284","\u2283":"\u2285","\u2286":"\u2288","\u2287":"\u2289","\u22a2":"\u22ac","\u22a8":"\u22ad","\u22a9":"\u22ae","\u22ab":"\u22af","\u227c":"\u22e0","\u227d":"\u22e1","\u2291":"\u22e2","\u2292":"\u22e3","\u22b2":"\u22ea","\u22b3":"\u22eb","\u22b4":"\u22ec","\u22b5":"\u22ed","\u2203":"\u2204"})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),r(12).add({Pcy:"\u041f",Poincareplane:"\u210c",Pr:"\u2abb",Prime:"\u2033",Proportion:"\u2237",par:"\u2225",para:"\xb6",parallel:"\u2225",parsim:"\u2af3",parsl:"\u2afd",part:"\u2202",pcy:"\u043f",percnt:"%",permil:"\u2030",perp:"\u22a5",pertenk:"\u2031",phmmat:"\u2133",phone:"\u260e",pitchfork:"\u22d4",planck:"\u210f",planckh:"\u210e",plankv:"\u210f",plus:"+",plusacir:"\u2a23",plusb:"\u229e",pluscir:"\u2a22",plusdo:"\u2214",plusdu:"\u2a25",pluse:"\u2a72",plusmn:"\xb1",plussim:"\u2a26",plustwo:"\u2a27",pm:"\xb1",pointint:"\u2a15",pound:"\xa3",pr:"\u227a",prE:"\u2ab3",prcue:"\u227c",pre:"\u2aaf",prec:"\u227a",precapprox:"\u2ab7",preccurlyeq:"\u227c",preceq:"\u2aaf",precsim:"\u227e",primes:"\u2119",prnE:"\u2ab5",prnap:"\u2ab9",prnsim:"\u22e8",prod:"\u220f",profalar:"\u232e",profline:"\u2312",profsurf:"\u2313",prop:"\u221d",propto:"\u221d",prsim:"\u227e",prurel:"\u22b0",puncsp:"\u2008"},"p")},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),r(12).add({RBarr:"\u2910",REG:"\xae",Racute:"\u0154",Rang:"\u27eb",Rarrtl:"\u2916",Rcaron:"\u0158",Rcedil:"\u0156",Rcy:"\u0420",ReverseElement:"\u220b",ReverseUpEquilibrium:"\u296f",Rho:"\u03a1",RightArrowBar:"\u21e5",RightDoubleBracket:"\u27e7",RightDownTeeVector:"\u295d",RightDownVectorBar:"\u2955",RightTeeVector:"\u295b",RightTriangleBar:"\u29d0",RightUpDownVector:"\u294f",RightUpTeeVector:"\u295c",RightUpVectorBar:"\u2954",RightVectorBar:"\u2953",RoundImplies:"\u2970",RuleDelayed:"\u29f4",rAarr:"\u21db",rArr:"\u21d2",rAtail:"\u291c",rBarr:"\u290f",rHar:"\u2964",race:"\u223d\u0331",racute:"\u0155",radic:"\u221a",raemptyv:"\u29b3",rang:"\u27e9",rangd:"\u2992",range:"\u29a5",rangle:"\u27e9",raquo:"\xbb",rarr:"\u2192",rarrap:"\u2975",rarrb:"\u21e5",rarrbfs:"\u2920",rarrc:"\u2933",rarrfs:"\u291e",rarrhk:"\u21aa",rarrlp:"\u21ac",rarrpl:"\u2945",rarrsim:"\u2974",rarrw:"\u219d",ratail:"\u291a",ratio:"\u2236",rationals:"\u211a",rbarr:"\u290d",rbbrk:"\u2773",rbrke:"\u298c",rbrksld:"\u298e",rbrkslu:"\u2990",rcaron:"\u0159",rcedil:"\u0157",rceil:"\u2309",rcub:"}",rcy:"\u0440",rdca:"\u2937",rdldhar:"\u2969",rdquo:"\u201d",rdquor:"\u201d",rdsh:"\u21b3",real:"\u211c",realine:"\u211b",realpart:"\u211c",reals:"\u211d",rect:"\u25ad",reg:"\xae",rfisht:"\u297d",rfloor:"\u230b",rhard:"\u21c1",rharu:"\u21c0",rharul:"\u296c",rightarrow:"\u2192",rightarrowtail:"\u21a3",rightharpoondown:"\u21c1",rightharpoonup:"\u21c0",rightleftarrows:"\u21c4",rightleftharpoons:"\u21cc",rightsquigarrow:"\u219d",risingdotseq:"\u2253",rlarr:"\u21c4",rlhar:"\u21cc",rlm:"\u200f",rmoustache:"\u23b1",rnmid:"\u2aee",roang:"\u27ed",roarr:"\u21fe",robrk:"\u27e7",ropar:"\u2986",roplus:"\u2a2e",rotimes:"\u2a35",rpar:")",rpargt:"\u2994",rppolint:"\u2a12",rrarr:"\u21c9",rsaquo:"\u203a",rsh:"\u21b1",rsqb:"]",rsquo:"\u2019",rsquor:"\u2019",rthree:"\u22cc",rtrie:"\u22b5",rtrif:"\u25b8",rtriltri:"\u29ce",ruluhar:"\u2968",rx:"\u211e"},"r")},function(t,e,r){"use strict";var n,i,o=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)});Object.defineProperty(e,"__esModule",{value:!0});var a=r(11),s=r(114),c=r(27);r(243);var l,u=(l=c.AbstractTags,o(h,l),h);function h(){return null!==l&&l.apply(this,arguments)||this}e.AmsTags=u;e.AmsConfiguration=a.Configuration.create("ams",{handler:{delimiter:["AMSsymbols-delimiter","AMSmath-delimiter"],macro:["AMSsymbols-mathchar0mi","AMSsymbols-mathchar0m0","AMSsymbols-delimiter","AMSsymbols-macros","AMSmath-mathchar0mo","AMSmath-macros","AMSmath-delimiter"],environment:["AMSmath-environment"]},items:(i={},i[s.MultlineItem.prototype.kind]=s.MultlineItem,i),tags:{ams:u},init:function(t){t.append(a.Configuration.extension())}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});function n(t){for(var e=[],r=0,n=t.length;r=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var o,a=r(11),s=r(9),T=r(22),w=r(116),b=r(18),c=r(3),l=(o=s.CommandMap,i(u,o),u.prototype.remove=function(t){this.map.delete(t)},u);function u(){return null!==o&&o.apply(this,arguments)||this}function A(t,e,r,n){var i,o,a,s;if(b.Package.packages.has(t.options.require.prefix+r)){var c=t.options.autoload[r],l=C(2===c.length&&Array.isArray(c[0])?c:[c,[]],2),u=l[0],h=l[1];try{for(var f=_(u),p=f.next();!p.done;p=f.next()){var d=p.value;k.remove(d)}}catch(t){i={error:t}}finally{try{p&&!p.done&&(o=f.return)&&o.call(f)}finally{if(i)throw i.error}}try{for(var m=_(h),y=m.next();!y.done;y=m.next()){var v=y.value;I.remove(v)}}catch(t){a={error:t}}finally{try{y&&!y.done&&(s=m.return)&&s.call(m)}finally{if(a)throw a.error}}t.i-=e.length+(n?0:7)}w.RequireLoad(t,r)}var k=new(e.AutoloadCommandMap=l)("autoload-macros",{},{}),I=new l("autoload-environments",{},{});e.AutoloadConfiguration=a.Configuration.create("autoload",{handler:{macro:["autoload-macros"],environment:["autoload-environments"]},options:{autoload:c.expandable({action:["toggle","mathtip","texttip"],amsCd:[[],["CD"]],bbox:["bbox"],boldsymbol:["boldsymbol"],braket:["bra","ket","braket","set","Bra","Ket","Braket","Set","ketbra","Ketbra"],cancel:["cancel","bcancel","xcancel","cancelto"],color:["color","definecolor","textcolor","colorbox","fcolorbox"],enclose:["enclose"],extpfeil:["xtwoheadrightarrow","xtwoheadleftarrow","xmapsto","xlongequal","xtofrom","Newextarrow"],html:["href","class","style","cssId"],mhchem:["ce","pu"],newcommand:["newcommand","renewcommand","newenvironment","renewenvironment","def","let"],unicode:["unicode"],verb:["verb"]})},config:function(t,e){var r,n,i,o,a,s,c=e.parseOptions,l=c.handlers.get("macro"),u=c.handlers.get("environment"),h=c.options.autoload;try{for(var f=_(Object.keys(h)),p=f.next();!p.done;p=f.next()){var d=p.value,m=h[d],y=C(2===m.length&&Array.isArray(m[0])?m:[m,[]],2),v=y[0],b=y[1];try{for(var g=(i=void 0,_(v)),M=g.next();!M.done;M=g.next()){var O=M.value;l.lookup(O)&&"color"!==O||k.add(O,new T.Macro(O,A,[d,!0]))}}catch(t){i={error:t}}finally{try{M&&!M.done&&(o=g.return)&&o.call(g)}finally{if(i)throw i.error}}try{for(var x=(a=void 0,_(b)),S=x.next();!S.done;S=x.next()){var E=S.value;u.lookup(E)||I.add(E,new T.Macro(E,A,[d,!1]))}}catch(t){a={error:t}}finally{try{S&&!S.done&&(s=x.return)&&s.call(x)}finally{if(a)throw a.error}}}}catch(t){r={error:t}}finally{try{p&&!p.done&&(n=f.return)&&n.call(f)}finally{if(r)throw r.error}}c.options.require.jax||w.RequireConfiguration.config(t,e)},configPriority:10,init:function(t){t.options.require||c.defaultOptions(t.options,w.RequireConfiguration.options)},priority:10})},function(t,e,r){"use strict";var u=this&&this.__values||function(t){var e="function"==typeof Symbol&&Symbol.iterator,r=e&&t[e],n=0;if(r)return r.call(t);if(t&&"number"==typeof t.length)return{next:function(){return t&&n>=t.length&&(t=void 0),{value:t&&t[n++],done:!t}}};throw new TypeError(e?"Object is not iterable.":"Symbol.iterator is not defined.")};Object.defineProperty(e,"__esModule",{value:!0});var n=r(11),i=r(3),o=r(9),h=r(22),f=r(72);var p=new o.CommandMap("configMacros",{},{});e.ConfigMacrosConfiguration=n.Configuration.create("configMacros",{handler:{macro:["configMacros"]},config:function(t,e){var r,n,i=t.options.macros;try{for(var o=u(Object.keys(i)),a=o.next();!a.done;a=o.next()){var s=a.value,c="string"==typeof i[s]?[i[s]]:i[s],l=Array.isArray(c[2])?new h.Macro(s,f.default.MacroWithTemplate,c.slice(0,2).concat(c[2])):new h.Macro(s,f.default.Macro,c);p.add(s,l)}}catch(t){r={error:t}}finally{try{a&&!a.done&&(n=o.return)&&n.call(o)}finally{if(r)throw r.error}}},options:{macros:i.expandable({})}})},function(t,e,r){"use strict";var n;Object.defineProperty(e,"__esModule",{value:!0});var i=r(11),o=r(118),a=r(8);r(247);e.NewcommandConfiguration=i.Configuration.create("newcommand",{handler:{macro:["Newcommand-macros"]},items:(n={},n[o.BeginEnvItem.prototype.kind]=o.BeginEnvItem,n),options:{maxMacros:1e3},init:function(t){t.handler.macro.indexOf(a.ExtensionMaps.NEW_COMMAND)<0&&t.append(i.Configuration.extension())}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(72);new(r(9).CommandMap)("Newcommand-macros",{newcommand:"NewCommand",renewcommand:"NewCommand",newenvironment:"NewEnvironment",renewenvironment:"NewEnvironment",def:"MacroDef",let:"Let"},n.default)},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0});var n=r(11);e.NoUndefinedConfiguration=n.Configuration.create("noundefined",{fallback:{macro:function(t,e){var r=t.create("text","\\"+e);t.Push(t.create("node","mtext",[],{mathcolor:"red"},r))}}})},function(t,e,r){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),e.registerTeX=function(t){if(MathJax.startup){MathJax.startup.registerConstructor("tex",n.TeX),MathJax.startup.useInput("tex"),MathJax.config.tex||(MathJax.config.tex={});var e=MathJax.config.tex.packages;MathJax.config.tex.packages=t,e&&(0,i.insert)(MathJax.config.tex,{packages:e})}};var n=r(105),i=r(3)},function(t,e,r){"use strict";r(251);var n=r(119);MathJax.startup&&(MathJax.startup.registerConstructor("mml",n.MathML),MathJax.startup.useInput("mml"))},function(t,e,r){"use strict";var n=r(5),i=s(r(119)),o=s(r(120)),a=s(r(121));function s(t){if(t&&t.__esModule)return t;var e={};if(null!=t)for(var r in t)Object.prototype.hasOwnProperty.call(t,r)&&(e[r]=t[r]);return e.default=t,e}(0,n.combineWithMathJax)({_:{input:{mathml_ts:i,mathml:{FindMathML:o,MathMLCompile:a}}}})},function(t,e,r){"use strict";r(253);var n=r(5),i=r(122);MathJax.loader&&(0,n.combineDefaults)(MathJax.config.loader,"output/chtml",{checkReady:function(){return MathJax.loader.load("output/chtml/fonts/tex")}}),MathJax.startup&&(MathJax.startup.registerConstructor("chtml",i.CHTML),MathJax.startup.useOutput("chtml"))},function(t,e,r){"use strict";var n=r(5),i=mt(r(122)),o=mt(r(1)),a=mt(r(148)),s=mt(r(2)),c=mt(r(125)),l=mt(r(127)),u=mt(r(172)),h=mt(r(174)),f=mt(r(167)),p=mt(r(130)),d=mt(r(146)),m=mt(r(150)),y=mt(r(152)),v=mt(r(168)),b=mt(r(132)),g=mt(r(160)),M=mt(r(136)),O=mt(r(134)),x=mt(r(144)),S=mt(r(155)),E=mt(r(149)),C=mt(r(138)),_=mt(r(142)),T=mt(r(74)),w=mt(r(37)),A=mt(r(162)),k=mt(r(165)),I=mt(r(140)),L=mt(r(164)),N=mt(r(159)),P=mt(r(157)),B=mt(r(170)),R=mt(r(16)),j=mt(r(124)),H=mt(r(23)),D=mt(r(36)),X=mt(r(123)),F=mt(r(128)),W=mt(r(126)),J=mt(r(173)),q=mt(r(175)),V=mt(r(76)),U=mt(r(131)),z=mt(r(147)),G=mt(r(151)),K=mt(r(153)),Z=mt(r(169)),Y=mt(r(133)),$=mt(r(161)),Q=mt(r(137)),tt=mt(r(135)),et=mt(r(145)),rt=mt(r(156)),nt=mt(r(73)),it=mt(r(139)),ot=mt(r(143)),at=mt(r(154)),st=mt(r(38)),ct=mt(r(163)),lt=mt(r(166)),ut=mt(r(141)),ht=mt(r(75)),ft=mt(r(39)),pt=mt(r(158)),dt=mt(r(171));function mt(t){if(t&&t.__esModule)return t;var e={};if(null!=t)for(var r in t)Object.prototype.hasOwnProperty.call(t,r)&&(e[r]=t[r]);return e.default=t,e}(0,n.combineWithMathJax)({_:{output:{chtml_ts:i,chtml:{FontData:o,Notation:a,Wrapper:s,WrapperFactory:c,Wrappers_ts:l,Wrappers:{TeXAtom:u,TextNode:h,maction:f,math:p,menclose:d,mfenced:m,mfrac:y,mglyph:v,mi:b,mmultiscripts:g,mn:M,mo:O,mpadded:x,mroot:S,mrow:E,ms:C,mspace:_,msqrt:T,msubsup:w,mtable:A,mtd:k,mtext:I,mtr:L,munderover:N,scriptbase:P,semantics:B}},common:{BBox:R,CssStyles:j,FontData:H,Notation:D,OutputJax:X,Wrapper:F,WrapperFactory:W,Wrappers:{TeXAtom:J,TextNode:q,maction:V,math:U,menclose:z,mfenced:G,mfrac:K,mglyph:Z,mi:Y,mmultiscripts:$,mn:Q,mo:tt,mpadded:et,mroot:rt,mrow:nt,ms:it,mspace:ot,msqrt:at,msubsup:st,mtable:ct,mtd:lt,mtext:ut,mtr:ht,munderover:ft,scriptbase:pt,semantics:dt}}}}})},function(t,e,r){"use strict";var n,i=this&&this.__extends||(n=function(t,e){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(t,e){t.__proto__=e}||function(t,e){for(var r in e)e.hasOwnProperty(r)&&(t[r]=e[r])})(t,e)},function(t,e){function r(){this.constructor=t}n(t,e),t.prototype=null===e?Object.create(e):(r.prototype=e.prototype,new r)}),o=this&&this.__assign||function(){return(o=Object.assign||function(t){for(var e,r=1,n=arguments.length;rdocument.body.offsetWidth-5&&(n=document.body.offsetWidth-l.offsetWidth-5),this.post(n,i)},kt.prototype.registerWidget=function(t){this.widgets.push(t)},kt.prototype.unregisterWidget=function(t){var e=this.widgets.indexOf(t);-1document.body.offsetWidth-5&&(i=Math.max(5,i-n-e.offsetWidth+6)),I.prototype.post.call(this,i,o)}},It.prototype.display=function(){this.baseMenu.getFrame().appendChild(this.getHtml())},It.prototype.setBaseMenu=function(){for(var t=this;(t=t.anchor.getMenu())instanceof It;);this.baseMenu=t},L=It,k.SubMenu=L,function(t){t.close=function(t){var e=t.getMenu();e instanceof N.SubMenu?e.baseMenu.unpost():e.unpost()},t.getActiveElement=function(t){var e=t.getMenu();return(e instanceof N.SubMenu?e.baseMenu:e).getStore().getActive()},t.error=function(t,e){console.log("ContextMenu Error: "+e)},t.counter=function(){return e++};var e=0}((N=vt=vt||{}).MenuUtil||(N.MenuUtil={})),P=vt=vt||{},B=P.AbstractEntry,bt(Lt,B),Object.defineProperty(Lt.prototype,"content",{get:function(){return this._content},set:function(t){this._content=t,this.generateHtml(),this.getMenu()&&this.getMenu().generateHtml()},enumerable:!0,configurable:!0}),Lt.prototype.getId=function(){return this.id},Lt.prototype.press=function(){this.disabled||(this.executeAction(),this.executeCallbacks_())},Lt.prototype.executeAction=function(){},Lt.prototype.registerCallback=function(t){-1===this.callbacks.indexOf(t)&&this.callbacks.push(t)},Lt.prototype.unregisterCallback=function(t){var e=this.callbacks.indexOf(t);-1!==e&&this.callbacks.splice(e,1)},Lt.prototype.mousedown=function(t){this.press(),this.stop(t)},Lt.prototype.mouseover=function(t){this.focus(),this.stop(t)},Lt.prototype.mouseout=function(t){this.deactivate(),this.stop(t)},Lt.prototype.generateHtml=function(){B.prototype.generateHtml.call(this);var t=this.getHtml();t.setAttribute("aria-disabled","false"),t.textContent=this.content},Lt.prototype.activate=function(){this.disabled||this.getHtml().classList.add(P.HtmlClasses.MENUACTIVE)},Lt.prototype.deactivate=function(){this.getHtml().classList.remove(P.HtmlClasses.MENUACTIVE)},Lt.prototype.focus=function(){this.getMenu().setFocused(this),B.prototype.focus.call(this),this.activate()},Lt.prototype.unfocus=function(){this.deactivate(),B.prototype.unfocus.call(this)},Lt.prototype.escape=function(t){P.MenuUtil.close(this)},Lt.prototype.up=function(t){this.getMenu().up(t)},Lt.prototype.down=function(t){this.getMenu().down(t)},Lt.prototype.left=function(t){if(this.getMenu()instanceof P.ContextMenu)this.getMenu().left(t);else{var e=this.getMenu();e.setFocused(null),e.getAnchor().focus()}},Lt.prototype.right=function(t){this.getMenu().right(t)},Lt.prototype.space=function(t){this.press()},Lt.prototype.disable=function(){this.disabled=!0;var t=this.getHtml();t.classList.add(P.HtmlClasses.MENUDISABLED),t.setAttribute("aria-disabled","true")},Lt.prototype.enable=function(){this.disabled=!1;var t=this.getHtml();t.classList.remove(P.HtmlClasses.MENUDISABLED),t.removeAttribute("aria-disabled")},Lt.prototype.executeCallbacks_=function(){P.MenuUtil.getActiveElement(this);for(var t=0,e=this.callbacks;t'+this.title+''),r.write("
"+this.generateContent()+"
"),r.write('
'),r.write(""),r.close()):(r.open(),r.write(""+this.title+''),r.write("
"+this.generateContent()+"
"),r.write(""),r.close(),setTimeout(this.resize.bind(this),50))},Jt.prototype.unpost=function(){this.windowList.forEach(function(t){return t.close()}),this.window=null},Jt.prototype.generateContent=function(){return this.content(this.active)},Jt.prototype.resize=function(){var t=this.window.document.body.firstChild,e=this.window.outerHeight-this.window.innerHeight||30,r=this.window.outerWidth-this.window.innerWidth||30;r=Math.max(140,Math.min(Math.floor(.5*this.window.screen.width),t.offsetWidth+r+25)),e=Math.max(40,Math.min(Math.floor(.5*this.window.screen.height),t.offsetHeight+e+25)),this.window.resizeTo(r,e);var n=this.active.getBoundingClientRect();if(n){var i=Math.max(0,Math.min(n.right-Math.floor(r/2),this.window.screen.width-r-20)),o=Math.max(0,Math.min(n.bottom-Math.floor(e/2),this.window.screen.height-e-20));this.window.moveTo(i,o)}this.active=null},Jt.popupSettings={status:"no",toolbar:"no",locationbar:"no",menubar:"no",directories:"no",personalbar:"no",resizable:"yes",scrollbars:"yes",width:400,height:300},yt=Jt,dt.Popup=yt,(vt=vt||{}).TOUCH={START:"touchstart",MOVE:"touchmove",END:"touchend",CANCEL:"touchcancel"}},function(t,e,r){"use strict";var n=r(5),i=l(r(225)),o=l(r(226)),a=l(r(229)),s=l(r(227)),c=l(r(228));function l(t){if(t&&t.__esModule)return t;var e={};if(null!=t)for(var r in t)Object.prototype.hasOwnProperty.call(t,r)&&(e[r]=t[r]);return e.default=t,e}(0,n.combineWithMathJax)({_:{ui:{menu:{MJContextMenu:i,Menu:o,MenuHandler:a,MmlVisitor:s,SelectableInfo:c}}}})},function(t,e,r){"use strict";r(78);var n=r(17),i=r(5),o=r(262);(0,i.combineDefaults)(MathJax.config.loader,"dependencies",o.dependencies),(0,i.combineDefaults)(MathJax.config.loader,"paths",o.paths),(0,i.combineDefaults)(MathJax.config.loader,"provides",o.provides),n.Loader.preLoad("loader"),n.Loader.load.apply(n.Loader,function(t){if(Array.isArray(t)){for(var e=0,r=Array(t.length);e + +# How tests are done? + +First and foremost note that this is a work in progress and that we are doing +our best to have serious testing of the library. + +We can also state our conclusion on testing: we are not and never will be +satisfied with our tests, there are not enough of them, we want more. + +The current system has on average 10000 tests by SIMD extensions. Thanks to +our "Python" approach we can automatically generate tests for all operators +and for all types. This has greatly helped us in finding bugs. But, as you +know, bugs are always there. + +## Why write this? + +Testing the library has been taken seriously since its very beginning. Tests +have gone through several stages: + +- The first one was during the development of the first version of the library. + Tests of operators were done with random numbers as input. Those random + numbers were all powers of 2 to ease the comparisons of basic arithmetic + types. NaNs and infinities were not generated as inputs and operators + behaviors with those inputs were not tested + +- For the second stage random numbers generators have been improved to emit + NaNs and infinities. It allowed us to detect many errors in operators, + mostly in math functions like cos, sin, exp... But we also discovered bugs + in hardware when NaNs and infinities are given to intrinsics. + +- The third stage which the current test system takes into account the + experience we gain with the privous two. As we have abandonned the buggy and + slow implementations of math functions coming from Boost.SIMD and now rely on + the excellent Sleef () we trust that the math functions + are correctly tested. In more details we do not generate NaNs and infinities + anymore because we trust functions coming from Sleef and we do not want + to write code in our tests to bypass hardware bugs. We only care that our + wrapping are correct adn that `nsimd::add` correctly calls add, the fact that + the add does not work correctly is a hardware bug then and not the + problem of the library. + +Tests on floatting points are done using ULPs. ULP means units in the last +place and is commonly used for the comparison of floatting point numbers. +It is in general a bad idea to compare floats with the `==` operators as +it essentially compares bits. Instead we want to check if the results of +two computations are "not to far away from each other". When checking an +operator, let's say, on CPUs and GPUs, we to take into account that +- the rounding mode may be different and +- the precision of the calculation may be different. + +## ULPs + +This chapter is dedicated to math proof concerning ULPs. Indeed people use +this notion but proofs are hard to find. We give our own definition of distance +in ULPs, compare it to the usual one and give pros and cons. +We assume the reader is familiar with basic mathematics. + +For this entire chapter fix the following: +- an integer $b > 1$ (will be our radix), +- an integer $p > 1$ (will be the number of digits in the mantissa) +- an integer $M > 1$ (will be the minimum exponent allowed for floatting + point numbers) +A floatting point number is an element of $\mathbb{R}$ of the form +$m b^e$ with $e \geq -M$ and $m \in \mathbb{Z}$. More precisely we define +the set of floatting point numbers $F$ to be the union of the following two +sets: +- $\{ mb^e \in F \text{ with } e > -M \}$ the *normal* numbers. +- $\{ mb^{-M} \in F \text{ with } m \in \mathbb{Z} \text{ and } + 0 < |m| < b^p \}$ the *denormal* or *subnormal* numbers. + +The set $F$ can be viewed as a subset of $\mathbb{R}$ with the mapping +$\phi : (m, e) \mapsto mb^e$ and we will make this abuse of +notation in what follows. Usually the sign of the floatting point number +is separated from $m$ but we include it "inside" $m$ as it does not change +the proofs below and simplifies the notations. + +Let $a_i \in F$ for $i = 1,2$ such that $a_i = m_i b^{e_i}$. + +**Proposition:** $\phi$ is injective. + +**Proof:** Suppose that $a_1 = a_2$ or $m_1b^{e_1} = m_2b^{e_2}$. If $a_1$ +and $a_2$ are subnormal numbers then $e_1 = e_2 = -M$ and $m_1 = m_2$. If +$a_1$ and $a_2$ are normal numbers suppose that $e_2 > e_1$, then +$|\frac{m_2b^{e_2}}{m_1b^{e_1}}| > b^{e_2 + p - 1 - e_1 - p} += b^{e_2 - e_1 - 1} \geq b^{1 - 1} = 1$ therefore +$m_2b^{e_2} \neq m_1b^{e_1}$ which is absurd hence $e_1 = e_2$ and as a +consequence $m_1 = m_2$. + +**Definition:** We define the *distance in ULPs between $a_1$ and $a_2$* +denoted by $U(a_1, a_2)$ to be: +- $|m_1b^{e_1 - e_2} - m_2|$ if $e_1 \geq e_2$, +- $|m_1 - m_2b^{e_2 - e_1}|$ otherwise. + +**Example:** Take $a_1 = 123456 \times 10^5$ and $a_2 = 123789 \times 10^5$ +Then as the exponents of $a_1$ and $a_2$ are the same we have +$U(123456 \times 10^5, 123789 \times 10^5) = |123789 - 123456| = 333$. + +The following proposition confort the name "units in the last place". + +**Proposition:** Let $f = \lfloor \log_b U(a_1, a_2) \rfloor + 1$ and suppose +that $a_1, a_2$ are of same sign and have the same exponents, then either the +first $p - f$ digits of $m_1$ and $m_2$ are identical or their difference is +$\pm 1$. + +**Proof:** For $i = 1,2$ there exists $q_i \in \mathbb{Z}$ and +$0 \leq r_i < b^f$ such that $m_i = q_i b^f + r_i$. Then +$|q_1 - q_2| \leq \frac{|m_1 - m_2| + |r_1 - r_2|}{b^f} +< \frac{b^{\log_b(U(a_1, a_2)} + b^f}{b^f} = 2$ + +So that either $q_1 = q_2$ or $q_1 - q_2 = \pm 1$. It is interesting to know +what are the cases when $q_1 - q_2 \pm 1$. Suppose that $0 \leq m_1 < m_2$ +and that $q_1 = q_2 + 1$ then $m_1 = q_1 b^f + r_1 \geq q_2 b^f + b^f > +q_2 b^f + r_2 = m_2$ which contradicts the hypothesis hence $q_1 \leq q_2$. +Finally $r_1 + U(a_1, a_2) = r_1 + (m_2 - m_1) = q_2 b^f + r_2 - q_1 b^f += r_2 + b_f$ so that: +- $r_1 + U(a_1, a_2) \geq b^f$ and +- $r_1 = r_2 + (b_f - U(a_1, a_2)) = r_2 + (b^f - b^{\log_b(U(a_1, a_2))}) + > r_2$. + +**Example:** Taking back $a_1 = 123456 \times 10^5$ and +$a_2 = 123789 \times 10^5$. As $q_1 = q_2$ we have the first 3 digits of $a_1$ +and $a_2$ that are identical and they differ by their last +$\log_{10} \lfloor U(a_1, a_2) \rfloor + 1 += \lfloor \log_{10}(333) \rfloor + 1 = 3$ + +**Example:** Now take $a_1 = 899900 \times 10^5$ and +$a_2 = 900100 \times 10^5$. We have $f = 3$ but $q_2 = q_1 + 1$ and +$r_2 = 900 > 100 = r_1$ and $r_2 + U(a_1, a_2) = 1100 \geq 1000 = 10^3$. + +The propositions above show that our definition of the ULP distance is well +choosen as we have the following results: +- (second proposition) is measures de number of different digits at the end + of the mantissa. +- (first proposition) if we write the numbers differently but still in base $b$ + we only change the number of different digits in the last places by some + zeros. The latter number being the exponent of $b$ that represents the + difference in scaling of both representations of floatting point numbers. + +We show now how to compute it using the IEEE 754 floatting point numbers +representation. A floatting point number $(m, e) \in F$ is stored in memory +(and registers) as the integer $\pm ((e + M)b^p + |m|)$. + +**Proposition:** If $e_2 \geq e_1 + 2$ then $U(a_1, a_2) \geq b^p$. + +**Proof:** We have $U(a_1, a_2) = |m_2 b^{e_2 - e_1} - m_1| +\geq ||m_2| b^{e_2 - e_1} - |m_1||$. But $m_2$ is a normal number otherwise we +would have $e_2 = -M = e_1$ so that $|m_2| \geq b^{p - 1}$ and we have +$|m_2| b^{e_2 - e_1} \geq b^{p - 1 + e_2 - e_1} \geq b^{p + 1} > |m_1|$, +therefore $||m_2| b^{e_2 - e_1} - |m_1|| \geq |m_2|b^2 - |m_1| +> b^{p - 1 + 2} - b^p = b^p$. + +The proposition above basically states that if two floatting point numbers +are two orders of magnitude away then that have no digits in common, and +that there are godd chances that comparing them is not interesting at all. + +The usual definition of the distance in ULPs is roughly given as the number +of floatting point numbers between the two considered floatting point numbers. +More precisely we will denote it by $V$ and it is defined as follows: +- $V(a_1, a_2) = |(e_1 + M)b^p + |m_1| - (e_2 + M)b^p - |m_2||$ if $a_1$ and + $a_2$ have the same signs +- $V(a_1, a_2) = (e_1 + M)b^p + |m_1| + (e_2 + M)b^p + |m_2|$ otherwise. + +**Proposition:** If $e_1 = e_2$ and $a_1$, $a_2$ have the same sign then +$U(a_1, a_2) = V(a_1, a_2)$. + +**Proof:** We have $V(a_1, a_2) = |(e_1 + M)b^p + m_1 - (e_2 + M)b^p - m_2|$, +but as $e_1 = e_2$, we end up with $V(a_1, a_2) = |m_1 - m_2| = U(a_1, a_2)$. + +**Proposition:** $V(a_1, a_2) = 1$ is equivalent to $U(a_1, a_2) = 1$. + +**Proof:** The proposition is true if $e_1 = e_2$. Suppose that $e_2 > e_1$. +Note that $a_2$ is a normal number so that $m_2 \geq b^{p - 1}$. + +We first suppose that $V(a_1, a_2) = 1$. Then by the definition of $V$, $a_1$ +and $a_2$ have same sign otherwise $V(a_1, a_2) \geq 2$ and we suppose that +$a_i \geq 0$. Moreover we have $e_2 = e_1 + 1$ otherwise we would have that +$a_1 = m_1b^{e_1} < m_1b^{e_1 + 1} < m_2b^{e_1 + 2} \leq a_2$. Now we have +$(b^p - 1)b^{e_1} < b^{p - 1}b^{e_1 + 1}$ and let +$(b^p - 1)b^{e_1} \leq mb^e \leq b^{p - 1}b^{e_1 + 1}$. + +First note that if $a = mb^e$ is a normal number then $m \geq b^{p - 1}$ and if +$a$ is a subnormal number then $e = -M$ in which case we also have $e_1 = -M$ +and $m \geq b^p - 1 \geq b^{p - 1}$. In any case $m \geq b^{p - 1}$. + +We have $(b^p - 1)/m b^{e_1} < b^e < b^{p - 1}/m b^{e_1 + 1}$. But +$1 \leq (b^p - 1) / m$ and $b^{p - 1} / m \leq 1$ so that +$b^{e_1} \leq b^e \leq b^{e_1 + 1}$ and $e = e_1$ or $e = e_1 + 1$. In the +first case $(b^p - 1)b^{e_1} \leq mb^{e_1}$ so that $b^p - 1 \leq m$ but +$m < b^p$ and $m = b^p - 1$. In the second case +$mb^{e_1 + 1} \leq b^{p - 1}b^{e_1 + 1}$ so that $m \leq b^{p - 1}$ but +$b^{p - 1} \leq m$ and $m = b^{p - 1}$. We have proven that two consecutive +elements of $F$ with $e_2 = e_1 + 1$ are neessary of the form +$a_1 = (b^p - 1)b^{e_1}$ and $a_2 = b^{p - 1}b^{e_1 + 1}$. Now we can compute +$U(a_1, a_2) = |bb^{p - 1} - (b^p - 1)| = 1$. + +Conversely, suppose that $U(a_1, a_2) = 1$, then +$|b^{e_2 - e_1}m_2 - m_1| = 1$. Suppose that $b^{e_2 - e_1}m_2 - m_1 = -1$, +then $-1 \geq bb^{p - 1} - b^p = 0$ which is absurd. We then have +$b^{e_2 - e_1}m_2 - m_1 = 1$. Suppose that $e_2 \geq e_1 + 2$ then we would +have that $b^{e_2 - e_1}m_2 - m_1 \geq b^2b^{p - 1} - b^p \geq b^p$ which is +absurd so that $e_2 = e_1 + 1$ and $bm_2 - m_1 = 1$. Suppose that +$m_2 \geq b^{p - 1} + 1$ then $bm_2 - m_1 \geq b^p + b - (b^p - 1) \geq 2$ +which is absurd so that $m_2 = b^{p - 1}$ and as a consequence $m_1 = b^p - 1$. + +If $a_1, a_2 < 0$, then $V(a_1, a_2) = 1$ is equivalent by definition to +$V(-a_1, -a_2) = 1$ which is equivalent to $U(-a_1, -a_2) = 1$ which is +by definition equivalent to $U(a_1, a_2) = 1$. + +**Proposition:** Suppose that $e_1 \leq e_2 \leq e_1 + 1$ then +$V \leq U \leq bV$. + +**Proof:** The proposition is true if $e_1 = e_2$. Suppose now that +$e_2 = e_1 + 1$. Then we have +$b^p + m_2 - m_1 \geq b^p + b^{p - 1} - b^p \geq 0$ +so that $V(a_1, a_2) = b^p + m_2 - m_1 = b^p + m_2(1 - b) + bm_2 - m_1$. But +$b^p + m_2(1 - b) \leq b^p + b^p(1 - b) \leq 0$ and +$bm_2 - m_1 \geq bb^{p - 1} - b^p = 0$ so that $V(a_1, a_2) \leq bm_2 - m_1 += U(a_1, a_2)$. On the other hand we have $bm_2 - m_1 +\leq b(b^p + m_2 - m_1 + m_1 - m_1/b - b^p)$ but +$m_1 - m_1/b - b^p \leq b^p - b^{p - 1}/b - b^p \leq 0$ so that +$U(a_1, a_2) \leq b(b^p + m_2 - m_1) = bV(a_1, a_2)$. + +**Remark:** The previous propositions shows that the difference between $V$ +and $U$ is only visible when the arguments have differents exponents and +are non consecutive. Our version of the distance in ULPs puts more weights +when crossing powers of $b$. Also if $e_2 \geq e_1 + 2$ then we have seen that +$a_1$ and $a_2$ have nothing in common which is indicated by the fact that +$U, V \geq b^p$. + +**Definition:** We now define the relative distance $D(a_1, a_2)$ between +$a_1$ and $a_2$ to be $|a_1 - a_2| / \min(|a_1|, |a_2|)$. + +**Proposition:** As $U$ is defined in a "mathematical" way compared to $V$ then +the relation between $U$ and $D$ is straightforward and we have +$D(a_1, a_2) = U(a_1, a_2) / |m_1|$. Moreover we have +$b^{-q}U \leq D \leq b^{1 - q}U$ where $q$ is the greatest integer such that +$b^{q - 1} \leq |m_1| < b^q$. In particular if $a_1$ is a normal number then +$p = q$. + +**Proof:** Suppose that $|a_1| < |a_2|$, then we have three cases: +- If $a_2$ is denormal, then so is $a_1$ and $e_1 = -M = e_2$. +- If $a_2$ is normal, then: + + If $a_1$ is denormal then $e_1 < e_2$. + + If $a_1$ and $a_2$ are normal numbers then $|m_1/m_2| b^{e_1 - e_2} < 1$ + but $|m_1/m_2| \geq b^{p - 1} / b^p = b^{-1}$ and we have + $b^{e_1 - e_2 - 1} < 1$ so that $e_1 < e_2 + 1$ or $e_1 \leq e_2$. +In any case we have $e_1 \leq e_2$, as a consequence we have +$D(a_1, a_2) = |m_1b^{e_1} - m_2b^{e_2}| / \min(|m_1|b^{e_1}, |m_2|b^{e_2}) += |m_1 - m_2b^{e_2 - e_1}| / \min(|m_1|, |m_2|b^{e_2 - e_1})$. Therefore +$D(a_1, a_2) = U(a_1, a_2) / \min(|m_1|, |m_2|b^{e_2 - e_1})$. Now if +$e_1 = e_2$ then $\min(|m_1|, |m_2|) = |m_1|$ but if $e_2 > e_1$ then $a_2$ is +a normal number and $|m_1| < b^p = b \times b^{p - 1} \leq b^{e_2 - e_1} |m_2|$ +and again $\min(|m_1|, |m_2|b^{e_2 - e_1}) = |m_1|$. + +Applying $b^{q - 1} \leq |m_1| < b^q$ we get that +$b^{-q}U \leq D \leq b^{1 - q}U$. If moreover $a_1$ is a normal number then +by definition $p = q$. + +**Remark:** Using the inequality of the previous proposition and taking the +base-$b$ logarithm we get $-q + \log U \leq \log D \leq 1 - q + \log U$ and +then $-q + \lfloor \log U \rfloor \leq \lfloor \log D \rfloor +\leq 1 - q + \lfloor \log U \rfloor$ hence two possibilities: +- $-q + \lfloor \log U \rfloor = \lfloor \log D \rfloor$ in which case + $\lfloor \log U \rfloor + (-\lfloor \log D \rfloor) = q$. +- $1 - q + \lfloor \log U \rfloor = \lfloor \log D \rfloor$ in which case + $1 + \lfloor \log U \rfloor + (-\lfloor \log D \rfloor) = q$. +According to a above proposition we know that $f = 1 + \lfloor \log U \rfloor$ +can be interpreted as the number of differents digits in the last places of the +mantissa. Write $\mathcal{D} = - \lfloor \log D \rfloor$ then +$q \leq f + \mathcal{D} \leq q + 1$. The latter inequality shows that +$\mathcal{D}$ can be interpreted as the number of digits which are the same in +the mantissa near the "first" place. Note that for denormal numbers the "first" +places are near the bit of most significance. We can conclude this remark with +the interpretation that two floatting point numbers have at least +$\mathcal{D} - 1$ digits in common in the first place of the mantissa and $f$ +digits which are different in the last place of the mantissa. + +**Algorithm:** We give below the C code for $U$ with a caveat. As seen in a +previous proposition when $e_2 \geq e_1 + 2$ the arguments have no digit in +common and can be considered too far away in which case we return `INT_MAX` (or +`LONG_MAX`). As a side effect is that the code will be free of multiprecision +integers (which would be necessary as soon as $|e_2 - e_1| \geq 12$) hence +lesser dependencies, readability, maintainability and performances. +When $|e_2 - e_1| \leq 1$ we use the formula of the definition. + +```c +/* We suppose that floats are IEEE754 and not NaN nor infinity */ + +struct fl_t{ + int mantissa; + int exponent; +}; + +fl_t decompose(float a_) { + fl_t ret; + unsigned int a; + memcpy(&a, &a_, sizeof(float)); /* avoid aliasing */ + ret.exponent = (int)((a >> 23) & 0xff) - 127; + if (ret.exponent == -127) { + /* denormal number */ + ret.mantissa = (int)(a & 0x007fffff); + } else { + ret.mantissa = (int)((1 << 23) | (a & 0x007fffff)); + } + if (a >> 31) { + ret.mantissa = -ret.mantissa; + } + return ret; +} + +int distance_ulps(float a_, float b_) { + fl_t a, b; + a = decompose(a_); + b = decompose(b_); + + if (a.exponent - b.exponent < -1 || a.exponent - b.exponent > 1) { + return INT_MAX; + } + + int d; + if (a.exponent == b.exponent) { + d = a.mantissa = b.mantissa; + } else if (a.exponent > b.exponent) { + d = 2 * a.mantissa - b.mantissa; + } else { + d = 2 * b.mantissa - a.mantissa; + } + + return d > 0 ? d : -d; +} +``` + +The algorithm for computing $\mathcal{D} - 1$ follows: + +```c +int d(float a_, float b_) { + float absa = fabsf(a_); + float absb = fabsf(b_); + + /* ensure that |a_| <= |b_| */ + if (absb < absa) { + float tmp = absa; + absa = absb; + absb = tmp; + } + + fl_t a = decompose(absa); + int q = 0; + for (q = 0; q <= 23 && (2 << q) <= a.mantissa; q++); + + int ulps = distance_ulps(a_, b_); + int lu; + for (lu = 0; lu <= 30 && (2 << (lu + 1)) <= a.mantissa; lu++); + + return q - (lu + 1) - 1; +} +``` + +## What we really do in the tests + +As said above buggy intrinsics can be easily found. But the bugs appears for +corner cases typically involving NaNs and/or infinities. But according to the +philosophy of NSIMD, it is not the job of its standard operators to propose a +non buggy alternative to a buggy intrinsics. But we still have the problem of +testing. A consequence of the philosophy of NSIMD is that we only have to test +that intrinsics are correctly wrapped. We can reasonably assume that testing +for floatting point numbers on only normal numbers is more than sufficient. + +Moreover, an implementation (buggy or not), may have different parameters set +that controls how floatting point arithmetic is done on various components of +the chip. An non exhaustive list includes: +- Rounding modes (which is not controlled by NSIMD as it is a library) +- FTZ/DAZ (flush to zero) denormal values never appear. +- FTZ/DAZ on some components (SIMD parts) and not others (scalar parts) +- Non IEEE behavior (eg. some NVIDIA GPU and ARMv7 chips) +- A mix of the above +- A buggy mix of the above + +As a consequence we do not compare floats using the operator `=` nor do we +use a weird-buggy formula involving the machine epsilon. Instead we use +the algorithm above to make sure that the first bits are correct. More +precisely we use the following algorithm and its variants for float16 and +doubles where `ufp` stands for `units in the first place`. + +```c +/* a_ and b_ must be IEEE754 and normal numbers */ +int ufps(float a_, float b_) { + unsigned int a, b; + memcpy(&a, &a_, 4); + memcpy(&b, &b_, 4); + int ea = (int)((a >> 23) & 0xff); + int eb = (int)((b >> 23) & 0xff); + if (ea - eb > 1 || ea - eb < -1) { + return 0; + } + int ma = (int)(a & 0x007fffff); + int mb = (int)(b & 0x007fffff); + int d = 0; + if (ea == eb) { + d = ma - mb; + } else if (ea > eb) { + d = 2 * ma - mb; + } else { + d = 2 * mb - ma); + } + d = (d >= 0 ? d : -d); + int i = 0; + for (; i < 30 && d >= (1 << i); i++); + return 23 - i; +} +``` diff --git a/doc/markdown/pack.md b/doc/markdown/pack.md new file mode 100644 index 00000000..b7a524d8 --- /dev/null +++ b/doc/markdown/pack.md @@ -0,0 +1,295 @@ +# NSIMD pack and related functions + +The advanced C++ API provides types that represents SIMD registers. These +types are struct that allows NSIMD to define infix operators. In this page +NSIMD concepts are reported in the documentation but you can think of them +as usual `typename`s. + +## The Pack type + +```c++ +template +struct pack { + // Typedef to retrieve the native SIMD type + typedef typename simd_traits::simd_vector simd_vector; + + // Typedef to retrieve T + typedef T value_type; + + // Typedef to retrieve SimdExt + typedef SimdExt simd_ext; + + // Static member to retrive N + static const int unroll = N; + + // Ctor that splats `s`, the resulting vector will be [s, s, s, ...] + template pack(S const &s); + + // Ctor that takes a SIMD vector of native type + // ONLY AVAILABLE when N == 1 + pack(simd_vector v); + + // Retrieve the underlying native SIMD vector + // ONLY AVAILABLE when N == 1 + simd_vector native_register() const; + +}; +``` + +Example: + +```c++ +#include +#include + +int main() { + nsimd::pack v(2.0f); + std::cout << v << '\n'; + + vf32 nv = v.native_register(); + nv = nsimd::add(nv, nv, f32()); + std::cout << nsimd::pack(nv) << '\n'; + + return 0; +} +``` + +### Infix operators available for packs + +- `pack operator+(pack const &, pack const &);` +- `pack operator*(pack const &, pack const &);` +- `pack operator-(pack const &, pack const &);` +- `pack operator/(pack const &, pack const &);` +- `pack operator-(pack const &);` +- `pack operator|(pack const &, pack const &);` +- `pack operator^(pack const &, pack const &);` +- `pack operator&(pack const &, pack const &);` +- `pack operator~(pack const &);` +- `pack operator<<(pack const &, int);` (only available for integers) +- `pack operator>>(pack const &, int);` (only available for integers) + +### Assignment operators available for packs + +- `pack operator+=(pack const &);` +- `pack operator-=(pack const &);` +- `pack operator*=(pack const &);` +- `pack operator/=(pack const &);` +- `pack &operator|=(pack const &other);` +- `pack &operator&=(pack const &other);` +- `pack &operator^=(pack const &other);` +- `pack &operator<<=(int);` +- `pack &operator>>=(int);` + +### Function aliases + +The C++ standard provides functions with different names that does exactly +the same thing. This is due to the retro compatibility with C. Take the +`fmin` C function as an example. In C this function give the minimum between +doubles only. The C++ standard provides overloads to this function so that it +can work on floats and long doubles. The aliases provided by NSIMD have the +same purpose but they are not provided as operator on their own because their +real purpose is to write generic code that can work on scalar and SIMD vector +types. As such they are only relevant for the advanced C++ API. + +- `pack fmin(pack const &, pack const &);` +- `pack fmax(pack const &, pack const &);` +- `pack fabs(pack const &);` + +They are contained in the `nsimd/cxx_adv_api_aliases.hpp` header and not +provided by default to respect the philosophy of NSIMD which is force the +use to think different between SIMD code and scalar code. They are provided +automatically when including `nsimd/nsimd-all.hpp`. + +## The Packl type + +```c++ +template +struct packl { + // Typedef to retrieve the native SIMD type + typedef typename simd_traits::simd_vectorl simd_vectorl; + + // Typedef to retrieve T + typedef T value_type; + + // Typedef to retrieve SimdExt + typedef SimdExt simd_ext; + + // Static member to retrive N + static const int unroll = N; + + // Ctor that splats `s`, the resulting vector will be [s, s, s, ...] + template packl(S const &s); + + // Ctor that takes a SIMD vector of native type + // ONLY AVAILABLE when N == 1 + packl(simd_vectorl v); + + // Retrieve the underlying native SIMD vector + // ONLY AVAILABLE when N == 1 + simd_vector native_register() const; + +}; +``` + +Example: + +```c++ +#include +#include + +int main() { + nsimd::pack v(2.0f); + nsimd::packl mask; + + mask = nsimd::eq(v, v); + std::cout << v << '\n'; + + mask = nsimd::neq(v, v); + std::cout << v << '\n'; + + return 0; +} +``` + +### Infix operators involving packls + +- `packl operator&&(packl const &, packl const &);` +- `packl operator||(packl const &, packl const &);` +- `packl operator!(packl const &, packl const &);` +- `packl operator==(pack const &, pack const &);` +- `packl operator!=(pack const &, pack const &);` +- `packl operator<(pack const &, pack const &);` +- `packl operator<=(pack const &, pack const &);` +- `packl operator>(pack const &, pack const &);` +- `packl operator>=(pack const &, pack const &);` + +## Packs for SoA/AoS + +Types containing several SIMD vectors are also provided to help the user +manipulate arrays of structures. When working, let's say, on complex numbers, +loading them from memory with layout `RIRIRIRIRIRI...` can be done with the +`load2*` operators that will returns 2 SIMD vectors `RRRR` and `IIII` where +`R` stands for real part and `I` for imaginary part. + +Similarily loading an RGB image from memory stored following the layout +`RGBRGBRGBRGB...` can be done with `load3*` to get 3 SIMD vectors `RRRR`, +`GGGG` and `BBBB`. + +### Packx1 + +```c++ +template +NSIMD_STRUCT packx1 { + + // Usual typedefs and static members + typedef typename simd_traits::simd_vector simd_vector; + typedef T value_type; + typedef SimdExt simd_ext; + static const int unroll = N; + static const int soa_num_packs = 1; + + // Member v0 for reading and writing + pack v0; +}; +``` + +### Packx2 + +```c++ +template +NSIMD_STRUCT packx2 { + + // Usual typedefs and static members + typedef typename simd_traits::simd_vector simd_vector; + typedef T value_type; + typedef SimdExt simd_ext; + static const int unroll = N; + static const int soa_num_packs = 2; + + // Members for reading and writing + pack v0; + pack v1; +}; +``` + +### Packx3 + +```c++ +template +NSIMD_STRUCT packx3 { + + // Usual typedefs and static members + typedef typename simd_traits::simd_vector simd_vector; + typedef T value_type; + typedef SimdExt simd_ext; + static const int unroll = N; + static const int soa_num_packs = 3; + + // Members for reading and writing + pack v0; + pack v1; + pack v2; +}; +``` + +### Packx4 + +```c++ +template +NSIMD_STRUCT packx4 { + + // Usual typedefs and static members + typedef typename simd_traits::simd_vector simd_vector; + typedef T value_type; + typedef SimdExt simd_ext; + static const int unroll = N; + static const int soa_num_packs = 4; + + // Members for reading and writing + pack v0; + pack v1; + pack v2; + pack v3; +}; +``` + +### Functions involving packx2, packx3 and packx4 + +The following functions converts packxs into unrolled packs. The difference +between the `to_pack` and `to_pack_interleave` families of functions is in +the way they flatten (or deinterleave) the structure of SIMD vectors. + +```c++ +template +pack to_pack(const packx2 &); + +template +pack to_pack(const packx3 &); + +template +pack to_pack(const packx4 &); + +template +pack to_pack_interleave(const packx2 &); + +template +pack to_pack_interleave(const packx3 &); + +template +pack to_pack_interleave(const packx4 &); +``` + +The `to_pack` family of functions performs the following operations: + +``` +packx2 = | v0 = [u0 u1 u2] | ---> [u0 u1 u2 w0 w1 w2] = pack + | v1 = [w0 w1 w2] | +``` + +while the `to_pack_interleave` family of functions does the following: + +``` +packx2 = | v0 = [u0 u1 u2] | ---> [u0 w0 v1 w1 v2 w2] = pack + | v1 = [w0 w1 w2] | +``` + diff --git a/doc/what_is_wrapped.cpp b/doc/what_is_wrapped.cpp index d9da66c5..a1ffd174 100644 --- a/doc/what_is_wrapped.cpp +++ b/doc/what_is_wrapped.cpp @@ -1,6 +1,6 @@ /* -Copyright (c) 2020 Agenium Scale +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -145,8 +145,7 @@ int is_number(std::string const &s) { int is_macro(std::string const &s) { for (size_t i = 0; i < s.size(); i++) { - if (s[i] != '_' && !(s[i] >= 'A' && s[i] <= 'Z') && - !(s[i] >= 'a' && s[i] <= 'z')) { + if (s[i] != '_' || !(s[i] >= 'A' && s[i] <= 'Z')) { return false; } } @@ -180,7 +179,7 @@ void parse_file(std::string const &input_vars, std::string const &simd_ext, // now split string on spaces and removes some tokens std::vector to_be_removed( ns2::split("return,signed,unsigned,char,short,int,long,float,double," - "const,void," + + "const,void,__vector,__bool,bool,vector" + type_names_str + "," + input_vars, ',')); std::vector to_be_removed_by_prefix(ns2::split( @@ -211,8 +210,6 @@ void parse_file(std::string const &input_vars, std::string const &simd_ext, // find func_name size_t pos = find(tokens, func_name); if (pos == not_found) { - std::cerr << "WARNING: cannot find function '" << func_name << "' in '" - << filename << "'\n"; table[op_name][typ] = "NA"; continue; } @@ -251,15 +248,20 @@ void parse_file(std::string const &input_vars, std::string const &simd_ext, if (simd_ext == "neon128" || simd_ext == "aarch64") { table[op_name][typ] += "(https://developer.arm.com/architectures/instruction-sets/" - "simd-isas/neon/intrinsics?search=" + - tokens[i0 + 1] + ")"; + "intrinsics/" + tokens[i0 + 1] + ")"; } else if (ns2::startswith(simd_ext, "sve")) { table[op_name][typ] += "(https://developer.arm.com/documentation/100987/0000)"; - } else { + } else if (simd_ext == "sse2" || simd_ext == "sse42" || + simd_ext == "avx" || simd_ext == "avx2" || + simd_ext == "avx512_knl" || simd_ext == "avx512_skylake") { table[op_name][typ] += "(https://software.intel.com/sites/landingpage/" "IntrinsicsGuide/#text=" + tokens[i0 + 1] + ")"; + } else if (simd_ext == "vsx" || simd_ext == "vmx") { + table[op_name][typ] += + "(https://www.ibm.com/docs/en/xl-c-aix/13.1.3?topic=functions-" + + ns2::replace(tokens[i0 + 1], "_", "-") + ")"; } } else { if (find(std::vector(tokens.begin() + i0, diff --git a/egg/common.py b/egg/common.py index 5b43f7fb..7c87a789 100644 --- a/egg/common.py +++ b/egg/common.py @@ -42,6 +42,7 @@ import platform import string import shutil +import math # ----------------------------------------------------------------------------- # print @@ -92,7 +93,7 @@ def open_utf8(opts, filename): if opts.simple_license: fout.write('''{} -Copyright (c) 2020 Agenium Scale +Copyright (c) 2021 Agenium Scale {} @@ -100,7 +101,7 @@ def open_utf8(opts, filename): else: fout.write('''{} -Copyright (c) 2020 Agenium Scale +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -188,8 +189,8 @@ def clang_format(opts, filename, cuda=False): ] ppc_simds = [ - 'power7', - 'power8', + 'vmx', + 'vsx', ] simds = ['cpu'] + x86_simds + arm_simds + ppc_simds @@ -211,8 +212,8 @@ def clang_format(opts, filename, cuda=False): 'sve512': ['cpu', 'aarch64', 'sve512'], 'sve1024': ['cpu', 'aarch64', 'sve1024'], 'sve2048': ['cpu', 'aarch64', 'sve2048'], - 'power7': ['cpu', 'power7'], - 'power8': ['cpu', 'power8'] + 'vmx': ['cpu', 'vmx'], + 'vsx': ['cpu', 'vmx', 'vsx'] } ftypes = ['f64', 'f32', 'f16'] @@ -265,6 +266,11 @@ def logical(typ): if CPU_NBITS != 128: raise ValueError('CPU_NBITS must be 128') +def get_arg(i): + fmtspec = { 'in0': in0, 'in1': in1, 'in2': in2, 'in3': in3, 'in4': in4, + 'in5': in5 } + return '{{in{}}}'.format(i).format(**fmtspec) + def get_args(n): fmtspec = { 'in0': in0, 'in1': in1, 'in2': in2, 'in3': in3, 'in4': in4, 'in5': in5 } @@ -485,6 +491,12 @@ def get_one_type_scalar(param, t): else: raise ValueError('Unknown param: "{}"'.format(param)) +def get_first_discriminating_type(params): + for i in range(len(params)): + if params[i] in ['v', 'l', 'vx2', 'vx3', 'vx4']: + return i + return -1 + # ----------------------------------------------------------------------------- # Formats @@ -548,600 +560,6 @@ def get_modules(opts): opts.modules_list = ret return ret -# ----------------------------------------------------------------------------- -# Ulps - -import json - -def load_ulps_informations(opts): - path = opts.script_dir - filename = os.path.join(path, "ulp.json") - with open(filename) as data_file: - data = json.load(data_file) - - ulps = dict() - for info in data["ulps"]: - type = info["type"] - func = info["func"] - if not func in ulps: - ulps[func] = dict() - ulps[func][type] = info - - return ulps - -def ulps_from_relative_distance_power(p): - return { - 'f16': max(11 - p, 1), - 'f32': max(24 - p, 1), - 'f64': max(53 - p, 1) - } - -# ----------------------------------------------------------------------------- -# Domain stuff - -class MathSet(object): - @property - def mul(self): - return self.mul_ - - @property - def add(self): - return self.add_ - - @property - def variable(self): - return self.variable_ - - @property - def natural(self): - return self.natural_ - - def parse_sum(self, fstring): - npos = fstring.find('n') - zpos = fstring.find('z') - if npos>=0 or zpos >=0: - if npos>=0: - self.natural_ = True - else: - self.natural_ = False - - if fstring.find('-')>=0: - self.mul_ = -self.mul_ - else: - self.add_ = float(fstring) - - def __init__(self, fstring): - npos = fstring.find('n') - zpos = fstring.find('z') - if npos < 0 and zpos < 0: - self.mul_ = 1. - self.add_ = float(fstring) - self.variable_ = False - self.natural_ = False - else: - self.variable_ = True - self.mul_ = 1. - self.add_ = 0. - self.natural_ = False - - product = fstring.split('*') - - for part in product: - npos = part.find('n') - zpos = part.find('z') - if npos >= 0 or zpos >= 0: - sumstring = part.split('+') - if len(sumstring) > 1: - self.parse_sum(sumstring[0][1:]) - self.parse_sum(sumstring[1][:-1]) - else: - self.parse_sum(sumstring[0]) - else: - if part == 'pi': - self.mul_ = math.pi * self.mul_ - else: - self.mul_ = float(part) * self.mul_ - - def __str__ (self): - if self.variable: - if self.natural_: - var = 'n' - set_ = 'ℕ' - else: - var = 'z' - set_ = 'ℤ' - - if self.add_ != 0: - add='({}+{:g})'.format(var,self.add_) - else: - add='{}'.format(var) - - if self.mul_ == math.pi: - mul = 'π⋅' - elif abs(self.mul_) == 1: - mul = '' - else: - mul = '{:g}⋅'.format(abs(self.mul_)) - - sign = '' - if self.mul_ < 0: - sign = '-' - - return '{var}∈{set_}:{sign}{mul}{add}'.format(var=var, set_=set_, - sign=sign, add=add, mul=mul) - else: - return '{:g}'.format(self.add) - -# ----------------------------------------------------------------------------- -# Class representing an interval, used to define function domains - -class Interval(object): - - @property - def left(self): - return self.left_ - - @property - def right(self): - return self.right_ - - @property - def open_left(self): - return self.open_left_ - - @property - def open_right(self): - return self.open_right_ - - @property - def removed(self): - return self.removed_ - - def code_for(self, value, typ): - is_fp = typ == 'f32' or typ == 'f64' - if value == float('-Inf'): - if is_fp: - return '-std::numeric_limits<{}>::infinity()'.format(typ) - else: - return 'std::numeric_limits<{}>::min()'.format(typ) - elif value == float('Inf'): - if is_fp: - return 'std::numeric_limits<{}>::infinity()'.format(typ) - else: - return 'std::numeric_limits<{}>::max()'.format(typ) - else: - return value - - def code_left(self, typ): - return self.code_for(self.left_, typ) - - def code_right(self, typ): - return self.code_for(self.right_, typ) - - # Parse the part before the '-' in the interval - # For instance, '(0,1)' in '(0,1)-{0.5}' - def parse_first_part(self,fstring): - real_ = True - fstring = fstring - if fstring[0] == 'R': - self.open_left_ = True - self.open_right_ = True - self.left_ = float('-Inf') - self.right_ = float('Inf') - self.real_ = True - return - if fstring[0] == 'B': - self.open_left_ = False - self.open_right_ = False - self.left_ = 0 - self.right_ = 1 - self.logical_ = True - return - if fstring[0] == 'N': - self.open_left_ = True - self.open_right_ = True - self.left_ = float('0') - self.right_ = float('Inf') - self.natural_ = True - return - if fstring[0] == 'Z': - self.open_left_ = True - self.open_right_ = True - self.left_ = float('-Inf') - self.right_ = float('Inf') - self.natural_ = True - return - elif fstring[0] == '(': - self.open_left_ = True - elif fstring[0] == '[': - self.open_left_ = False - else: - raise ValueError('Error in format string : "{}"'.format(fstring)) - - self.real_ = True - - length = len(fstring) - - if fstring[length-1] == ')': - self.open_right_ = True - elif fstring[length-1] == ']': - self.open_right_ = False - else: - raise ValueError('Error in format string : "{}"'.format(fstring)) - - numbers = fstring[1:length-1].split(',') - - if len(numbers) != 2: - raise ValueError('Error in format string : "{}"'.format(fstring)) - - self.left_ = float(numbers[0]) - self.right_ = float(numbers[1]) - - def parse_second_part(self, fstring): - for removed in fstring.split(','): - self.removed.append(MathSet(removed)) - - - def __init__(self, fstring): - self.left_ = -float('Inf') - self.right_ = float('Inf') - self.open_left_ = True - self.open_right_ = True - - self.real_ = False - self.natural_ = False - self.logical_ = False - - self.removed_ = [] - - split = fstring.find('\{') - - if split < 0: - self.parse_first_part(fstring); - else: - first_part = fstring[0:split] - scd_part = fstring[split+2:-1] - - self.parse_first_part(first_part); - if split > 0: - self.parse_second_part(scd_part) - - def __str__(self): - ret = '' - if self.real_: - if self.open_left: - open_left = '(' - else: - open_left = '[' - - if self.open_right: - open_right = ')' - else: - open_right = ']' - - all_r = True - if self.left == -float('inf'): - left = '-∞' - else: - left = '{:g}'.format(self.left) - all_r = False - - if self.right == float('inf'): - right = '+∞' - else: - right = '{:g}'.format(self.right) - all_r = False - - if all_r: - ret = 'ℝ' - else: - ret = '{}{}, {}{}'.format(open_left, left, right, open_right) - elif self.natural_: - if self.left == -float('inf'): - ret = 'ℤ' - else: - ret = 'ℕ' - elif self.logical_: - ret = '𝔹' - else: - raise ValueError ('Trying to print invalid interval') - - if self.removed_: - ret += '∖\\{' - comma = '' - for removed in self.removed_: - ret+=comma+str(removed) - comma = ', ' - ret += '\\}' - - return ret - - def code(self, typ): - left = self.code_left(typ) - right = self.code_right(typ) - if len(self.removed): - excluded = [] - for r in self.removed: - if r.variable: - ## TODO: - pass - else: - excluded.append(r.add) - if len(excluded): - exclude = ' || '.join('r == {}({})'. \ - format(typ, e) for e in excluded) - else: - exclude = 'false' - return ''' - while (true) {{ - {type} r = nsimd::benches::rand<{type}>({min}, {max}); - if ({exclude}) {{ - continue; - }} else {{ - return r; - }} - }} - '''.format(type=typ, exclude=exclude, min=left, max=right) - else: - return 'return nsimd::benches::rand<{type}>({min}, {max});'. \ - format(type=typ, min=left, max=right) - -# ----------------------------------------------------------------------------- -# Class representing a function domain - -class Domain(object): - - def __init__(self, str_list): - self.intervals_ = [] - - # Remove spaces in str_list - str_list = str_list.replace(' ','') - - # 0 dimension - if not str_list: - return - - dimensions_string = str_list.split('x') - - for union_string in dimensions_string: - interval_string = union_string.split('U') - - current = [] - for interval in interval_string: - try: - current.append(Interval(interval)) - except ValueError as v: - raise ValueError( \ - '{}\nEncountered while parsing domain {}'. \ - format(v, str_list)) - - self.intervals_.append(current) - - def __str__(self): - ret = '' - - product = '' - for union in self.intervals_: - u = '' - ret += product - if len(union) > 1 and len(self.intervals_) > 1: - ret += '(' - for interval in union: - ret += '{}{}'.format(u, interval) - u = ' ⋃ ' - if len(union) > 1 and len(self.intervals_) > 1: - ret += ')' - - product = ' × ' - - return '$' + ret + '$' - - @property - def intervals(self): - return self.intervals_ - - @property - def ndims(self): - return len(self.intervals_) - - def code(self, prefix_fun_name, typ): - code = '' - for i, unions in enumerate(self.intervals): - nunions = len(unions) - if nunions == 1: - nested_code = unions[0].code(typ) - else: - cases = [] - for j, union in enumerate(unions): - cases.append('case {n}: {{ {code} }};'. \ - format(n=j, code=union.code(typ))) - nested_code = ''' - /* Branch to one of the nested interval (union) */ - switch (rand() % {nunions}) {{ - {cases} - default: - /* SHOULD NEVER HAPPEN! This removes compiler warning! */ - return {type}(); - }} - '''.format(cases='\n'.join(cases), nunions=nunions, type=typ) - code += ''' - {type} {prefix}{n}() {{ - {code} - }} - - '''.format(type=typ, prefix=prefix_fun_name, n=i, code=nested_code) - return code - - def gen_rand(self, typ): - typlen = typ[1:] - ret = '' - - if typ[0] in ('i', 'u'): - #TODO: check that random number is in the function domain - for u, union in enumerate(self.intervals): - ret += \ - '''{typ} rand{u}() {{ - nsimd_nat i, r; - u8 *alias; - {typ} ret; - (void)i; - (void)alias; - (void)r; - '''.format(u=u+1, typ=typ) - - for i, interval in enumerate(union): - if interval.logical_: - ret += 'ret = (u8)(rand()) % 2;' - else: - if not interval.removed: - test='0' - else: - test = '||\n'. \ - join(['ret == {}'.format(removed) \ - for removed in interval.removed]) - - ret += \ - '''do {{ - alias = (u8*)(&ret); - for(i=0, r=rand(); i<(r%{it})+1; ++i) {{ - alias[i] = (u8)(rand() & 0xFF); - }} - for(;i<{it}; ++i) {{ - alias[i] = 0u; - }} - }} while ({test}); - '''.format(test=test, it=int(typlen) // 8) - - ret += 'return ret;}' - elif typ in ftypes: - #TODO: check that random number is in the function domain - for u, union in enumerate(self.intervals): - ret += \ - '''{typ} rand{u}() {{ - nsimd_nat i; - u8 *alias; - {typ} ret; - (void)i; - (void)alias; - '''.format(u=u+1, typ=typ) - - for i, interval in enumerate(union): - if interval.logical_: - if typ == 'f16': - ret += 'ret = nsimd_scalar_reinterpret_f16_u16(' \ - '(u16)(rand() % 2));' - else: - ret += 'ret = ({})(rand()%2);'.format(typ) - else: - ret += \ - '''alias = (u8*)(&ret); - for(i=0; i<{it}; ++i) {{ - alias[i] = (u8)(rand() & 0xFF); - }} - '''.format(it=int(typlen) // 8) - - ret += 'return ret;}' - - return ret - - -# ----------------------------------------------------------------------------- -# Sleef - -sleef_types = [ - 'f32', - 'f64', - ] - -sleef_simds = [ - 'sse2', - 'sse42', - 'avx', - 'fma4', - 'avx2', - ] - -def sleef_support_type(simd, typ): - ## NEON128 only supports 32bit floating points - if simd == 'neon128' and typ == 'f64': - return False - ## No f16 support + No integer supports - return not (typ == 'f16' or typ in itypes or typ in utypes) - -def sleef_name(name, simd, typ, ulp=None): - ## Sleef mangling: - ''' - 1. Function name in math.h - 2. Data type of vector element - 3. Number of elements in a vector - 4. [Accuracy for typical input domain] - 5. Vector extension - ''' - ## Filter - if not sleef_support_type(simd, typ): - return None - ## Craft it - ## 1. - name = 'Sleef_' + name - ## 2. + 3. - types_cpu = { - 'f32': 'f', - 'f64': '', - } - types_128 = { - 'f32': 'f4', - 'f64': 'd2', - } - types_256 = { - 'f32': 'f8', - 'f64': 'd4', - } - types_512 = { - 'f32': 'f16', - 'f64': 'd8', - } - types_unknown = { - 'f32': 'fx', - 'f64': 'dx', - } - name += ({ - 'cpu': types_cpu, - 'sse2': types_128, - 'sse42': types_128, - 'avx': types_256, - 'fma4': types_256, - 'avx2': types_256, - 'avx512_knl': types_512, - 'avx512_skylake': types_512, - 'neon128': types_128, - 'aarch64': types_128, - 'sve': types_unknown, - 'sve128': types_unknown, - 'sve256': types_unknown, - 'sve512': types_unknown, - 'sve1024': types_unknown, - 'sve2048': types_unknown, - 'power7': types_128, - 'power8': types_128 - })[simd][typ] - ## 4. (We cannot really guess that... - ## Instead you have to add bench manually) - if ulp is not None: - name += '_u{}'.format(ulp) - ## 5. (Translate or use `simd` directly) - if simd != 'cpu': - ## Careful of the extra _ - if ulp is None: - name += '_' - name += ({ - 'sse42': 'sse4', - 'avx512_knl': 'avx512f', - 'avx512_skylake': 'avx512f', - 'neon128': '', - 'aarch64': 'advsimd', - }).get(simd, simd) - return name - # ----------------------------------------------------------------------------- # Integer limits per type using macros defined in or diff --git a/egg/cuda.py b/egg/cuda.py index 70b33fec..eb216d30 100644 --- a/egg/cuda.py +++ b/egg/cuda.py @@ -109,6 +109,79 @@ def get_impl(operator, totyp, typ): 'typnbits': typ[1:] } + # src operators + if operator.src: + cuda_ops = { + 'sin_u35': 'sin', + 'cos_u35': 'cos', + 'tan_u35': 'tan', + 'asin_u35': 'asin', + 'acos_u35': 'acos', + 'atan_u35': 'atan', + 'atan2_u35': 'atan2', + 'log_u35': 'log', + 'cbrt_u35': 'cbrt', + 'sin_u10': 'sin', + 'cos_u10': 'cos', + 'tan_u10': 'tan', + 'asin_u10': 'asin', + 'acos_u10': 'acos', + 'atan_u10': 'atan', + 'atan2_u10': 'atan2', + 'log_u10': 'log', + 'cbrt_u10': 'cbrt', + 'exp_u10': 'exp', + 'pow_u10': 'pow', + 'sinh_u10': 'sinh', + 'cosh_u10': 'cosh', + 'tanh_u10': 'tanh', + 'sinh_u35': 'sinh', + 'cosh_u35': 'cosh', + 'tanh_u35': 'tanh', + 'asinh_u10': 'asinh', + 'acosh_u10': 'acosh', + 'atanh_u10': 'atanh', + 'exp2_u10': 'exp2', + 'exp2_u35': 'exp2', + 'exp10_u10': 'exp10', + 'exp10_u35': 'exp10', + 'expm1_u10': 'expm1', + 'log10_u10': 'log10', + 'log2_u10': 'log2', + 'log2_u35': 'log2', + 'log1p_u10': 'log1p', + 'sinpi_u05': 'sinpi', + 'cospi_u05': 'cospi', + 'hypot_u05': 'hypot', + 'hypot_u35': 'hypot', + 'remainder': 'remainder', + 'fmod': 'fmod', + 'lgamma_u10': 'lgamma', + 'tgamma_u10': 'tgamma', + 'erf_u10': 'erf', + 'erfc_u15': 'erfc' + } + args = common.get_args(len(operator.params[1:])) + cuda_op = cuda_ops[operator.name] + if typ == 'f16': + # For f16 CUDA offers only a few operator + if cuda_op in ['cos', 'exp', 'exp10', 'exp2', 'log', 'log10', + 'log2', 'sin']: + return '''#if __CUDA_ARCH__ >= 530 + return h{}({}); + #else + return __float2half(gpu_{}(__half2float({}))); + #endif'''.format(cuda_op, args, operator.name, args) + else: + args = ', '.join('__half2float({})'.format(common.get_arg(i)) \ + for i in range(len(operator.params[1:]))) + return 'return __float2half(gpu_{}({}));'. \ + format(operator.name, args) + elif typ == 'f32': + return 'return {}f({});'.format(cuda_op, args) + else: + return 'return {}({});'.format(cuda_op, args) + # bool first, no special treatment for f16's bool_operators = { 'andl': 'return {in0} && {in1};', diff --git a/egg/disabled_plateform_ppc.py b/egg/disabled_plateform_ppc.py deleted file mode 100644 index a148393b..00000000 --- a/egg/disabled_plateform_ppc.py +++ /dev/null @@ -1,1597 +0,0 @@ -# Copyright (c) 2019 Agenium Scale -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# This file gives the implementation for the Power PC platform. - -import common - -# ----------------------------------------------------------------------------- -# Helpers - -## Returns the 64 bits vector associated to a data type (eg:float32x2 for float32_t) - -fmtspec = {} - -## Returns the power pc type corresponding to the nsimd type -def ppc_vec_type(typ): - if typ == 'u8': - return '__vector unsigned char' - elif typ == 'i8': - return '__vector signed char' - elif typ == 'u16': - return '__vector unsigned short' - elif typ == 'i16': - return '__vector signed short' - elif typ == 'u32': - return '__vector unsigned int' - elif typ == 'i32': - return '__vector signed int' - elif typ == 'f32': - return '__vector float' - else: - raise ValueError('Unavailable type "{}" for ppc'.format(typ)) - -## Returns the logical power pc type corresponding to the nsimd type -def ppc_vec_typel(typ): - if typ[1:] == '8': - return '__vector __bool char' - elif typ == 'f16': - return 'struct {__vector __bool int v0; __vector __bool int v1;}' - elif typ[1:] == '16': - return '__vector __bool short' - elif typ[1:] == '32': - return '__vector __bool int' - else: - raise ValueError('Unknown type "{}"'.format(typ)) - -## Whether or not the half float are emulated -def emulate_fp16(simd_ext): - return True - -## Emulate 64 bits types (for power7) -def emulate_64(op, simd_ext, params): - fmtspec2 = fmtspec.copy() - fmtspec2['op'] = op - fmtspec2['buf_ret_decl'] = 'nsimd_cpu_{v}{typ} buf_ret;'. \ - format(v='v' if params[0] == 'v' else 'vl', **fmtspec) - fmtspec2['buf_decl'] = '\n'.join(['nsimd_cpu_{v}{typ} buf{p};'. \ - format(v='v' if p[1] == 'v' else 'vl', p=p[0], **fmtspec) \ - for p in common.enum(params[1:])]) - fmtspec2['bufs'] = ','.join(['buf{}'.format(i) \ - for i in range(0, len(params) - 1)]) - fmtspec2['ret_decl'] = 'nsimd_{simd_ext}_{v}{typ} ret;'. \ - format(v='v' if params[0] == 'v' else 'vl', - **fmtspec) - if common.CPU_NBITS == 64: - buf_set0 = '\n'.join('buf{i}.v0 = {ini}.v0;'. \ - format(i=i, ini=fmtspec['in{}'.format(i)]) \ - for i in range(0, len(params) - 1)) - buf_set1 = '\n'.join('buf{i}.v0 = {ini}.v1;'. \ - format(i=i, ini=fmtspec['in{}'.format(i)]) \ - for i in range(0, len(params) - 1)) - return '''{buf_ret_decl} - {buf_decl} - {ret_decl} - {buf_set0} - buf_ret = nsimd_{op}_cpu_{typ}({bufs}); - ret.v0 = buf_ret.v0; - {buf_set1} - buf_ret = nsimd_{op}_cpu_{typ}({bufs}); - ret.v1 = buf_ret.v0; - return ret;'''. \ - format(buf_set0=buf_set0, buf_set1=buf_set1, **fmtspec2) - else: - buf_set = '\n'.join('''buf{i}.v0 = {ini}.v0; - buf{i}.v1 = {ini}.v1;'''. \ - format(i=i, ini=fmtspec['in{}'.format(i)]) \ - for i in range(0, arity)) - return '''{buf_ret_decl} - {buf_decl} - {ret_decl} - {buf_set} - buf_ret = nsimd_{op}_cpu_{typ}({bufs}); - ret.v0 = buf_ret.v0; - ret.v1 = buf_ret.v1; - return ret;'''.format(buf_set=buf_set, **fmtspec2) - -## Emulate f16 bits types (for power7) -def emulate_16(op, simd_ext, arity, logical_return): - tmpl = ', '.join(['{{in{}}}.v{{{{i}}}}'.format(i).format(**fmtspec) \ - for i in range(0, arity)]) - args1 = tmpl.format(i='0') - args2 = tmpl.format(i='1') - - l='l' if logical_return else '' - - return '''nsimd_{simd_ext}_v{l}f16 ret; - ret.v0 = nsimd_{op}_{simd_ext}_f32({args1}); - ret.v1 = nsimd_{op}_{simd_ext}_f32({args2}); - return ret;'''. \ - format(l=l, op=op, args1=args1, args2=args2, **fmtspec) - - -# ----------------------------------------------------------------------------- -# Implementation of mandatory functions for this module - -def get_simd_exts(): - return ['power7', 'power8'] - -def get_type(simd_ext, typ): - # TODO: power8 - if simd_ext in get_simd_exts(): - if typ == 'f64': - return 'struct {double v0; double v1;}' - elif typ == 'i64': - return 'struct {i64 v0; i64 v1;}' - elif typ == 'u64': - return 'struct {u64 v0; u64 v1;}' - elif typ == 'f16': - return 'struct {__vector float v0; __vector float v1;}' - else: - return ppc_vec_type(typ) - else: - raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) - -def get_logical_type(simd_ext, typ): - if typ not in common.types: - raise ValueError('Unknown type "{}"'.format(typ)) - elif typ == 'i64': - return 'struct {u32 v0; u32 v1;}' - elif typ == 'u64': - return 'struct {u32 v0; u32 v1;}' - elif typ == 'f64': - return 'struct {u32 v0; u32 v1;}' - else: - return ppc_vec_typel(typ) - -def get_nb_registers(simd_ext): - if simd_ext in 'power7': - #TODO - return '32' - elif simd_ext == 'power8': - #TODO - return '64' - else: - raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) - -def get_SoA_type(simd_ext, typ, deg): - if simd_ext != 'sve': - raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) - return '{}x{}_t'.format(sve_typ(typ)[0:-2], deg) - -def has_compatible_SoA_types(simd_ext): - if simd_ext in get_simd_exts(): - return False - else: - raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) - -def get_additional_include(func, platform, simd_ext): - ret = '''#include - '''.format(func) - ret +='''#include - '''.format(simd_ext=simd_ext) - - if func == 'neq': - ret +='''#include - #include - '''.format(simd_ext=simd_ext) - - if func in ['loadlu', 'loadla']: - ret += '''#include - #include - #include - #include - '''.format(load='load' + func[5], **fmtspec) - - if func in ['storelu']: - ret += '''#include - #include - '''.format(**fmtspec) - - if func in ['shr', 'shl']: - ret += '''#include - '''.format(**fmtspec) - - if func[:11] == 'reinterpret': - ret += '#include ' - - if func[:4] == 'load': - ret +=''' - #define NSIMD_PERMUTE_MASK_32(a, b, c, d) \ - {(unsigned char)(4 * a), (unsigned char)(4 * a + 1), \ - (unsigned char)(4 * a + 2), (unsigned char)(4 * a + 3), \ - (unsigned char)(4 * b), (unsigned char)(4 * b + 1), \ - (unsigned char)(4 * b + 2), (unsigned char)(4 * b + 3), \ - (unsigned char)(4 * c), (unsigned char)(4 * c + 1), \ - (unsigned char)(4 * c + 2), (unsigned char)(4 * c + 3), \ - (unsigned char)(4 * d), (unsigned char)(4 * d + 1), \ - (unsigned char)(4 * d + 2), (unsigned char)(4 * d + 3)} - - #define NSIMD_PERMUTE_MASK_16(a, b, c, d, e, f, g, h) \ - {(unsigned char)(2 * a + 0), (unsigned char)(2 * a + 1), \ - (unsigned char)(2 * b + 0), (unsigned char)(2 * b + 1), \ - (unsigned char)(2 * c + 0), (unsigned char)(2 * c + 1), \ - (unsigned char)(2 * d + 0), (unsigned char)(2 * d + 1), \ - (unsigned char)(2 * e + 0), (unsigned char)(2 * e + 1), \ - (unsigned char)(2 * f + 0), (unsigned char)(2 * f + 1), \ - (unsigned char)(2 * g + 0), (unsigned char)(2 * g + 1), \ - (unsigned char)(2 * h + 0), (unsigned char)(2 * h + 1)} - - #define NSIMD_PERMUTE_MASK_8(a, b, c, d, e, f, g, h, \ - i, j, k, l, m, n, o, p) \ - { (unsigned char)(a), (unsigned char)(b), \ - (unsigned char)(c), (unsigned char)(d), \ - (unsigned char)(e), (unsigned char)(f), \ - (unsigned char)(g), (unsigned char)(h), \ - (unsigned char)(i), (unsigned char)(j), \ - (unsigned char)(k), (unsigned char)(l), \ - (unsigned char)(m), (unsigned char)(n), \ - (unsigned char)(o), (unsigned char)(p) } - ''' - - return ret - -# ----------------------------------------------------------------------------- -# Get SoA types - -def get_soa_typ(simd_ext, typ, deg): - ntyp = get_type(simd_ext, typ) if typ != 'f16' else 'float16x8_t' - return '{}x{}_t'.format(ntyp[:-2], deg) - -# ----------------------------------------------------------------------------- - -## Loads of degree 1, 2, 3 and 4 - -def load1234(simd_ext, typ, deg, aligned): - # Load n for every 64bits types - if typ[1:] == '64': - if deg == 1: - return ''' - nsimd_{simd_ext}_v{typ} ret; - ret.v0 = {in0}[0]; - ret.v1 = {in0}[1]; - return ret; - '''.format(deg=deg, **fmtspec) - else: - return \ - 'nsimd_{simd_ext}_v{typ}x{} ret;\n'.format(deg, **fmtspec) + \ - '\n'.join(['ret.v{i}.v0 = *({in0} + {i});'. \ - format(i=i, **fmtspec) for i in range(0, deg)]) + \ - '\n'.join(['ret.v{i}.v1 = *({in0} + {ipd});'. \ - format(i=i, ipd=i + deg, **fmtspec) \ - for i in range(0, deg)]) + \ - '\nreturn ret;\n' - - # Load n for f16 - if typ == 'f16': - if deg == 1: - return ''' - nsimd_{simd_ext}_vf16 ret; - f32 buf[4]; - buf[0] = nsimd_u16_to_f32(*(u16*){in0}); - buf[1] = nsimd_u16_to_f32(*((u16*){in0} + 1)); - buf[2] = nsimd_u16_to_f32(*((u16*){in0} + 2)); - buf[3] = nsimd_u16_to_f32(*((u16*){in0} + 3)); - ret.v0 = vec_ld(0, buf); - buf[0] = nsimd_u16_to_f32(*((u16*){in0} + 4)); - buf[1] = nsimd_u16_to_f32(*((u16*){in0} + 5)); - buf[2] = nsimd_u16_to_f32(*((u16*){in0} + 6)); - buf[3] = nsimd_u16_to_f32(*((u16*){in0} + 7)); - ret.v1 = vec_ld(0, buf); - return ret;; - '''.format(**fmtspec) - else: - ret = '''nsimd_{simd_ext}_vf16x{deg} ret; - f32 buf[4]; - '''.format(deg=deg, **fmtspec) - - for i in range(0, deg): - for k in range(0, 2): - for j in range (0, 4): - ret += 'buf[{j}] = nsimd_u16_to_f32(*((u16*){in0} + {shift}));\n'. \ - format(j=j, shift= i + k*4*deg + j*deg, **fmtspec) - ret += 'ret.v{i}.v{k} = vec_ld(0, buf);\n\n'.\ - format(i=i, k=k, **fmtspec) - - ret += 'return ret;' - return ret - - # Load 1 for every supported types - if deg == 1: - if aligned: - return 'return vec_ld(0, {in0});'.format(**fmtspec) - else: - return 'return *(({ppc_typ}*) {in0});'.\ - format(ppc_typ=ppc_vec_type(typ), **fmtspec) - - # Code to load aligned/unaligned vectors - if aligned: - load = 'nsimd_{simd_ext}_v{typ}x{deg} ret;\n'.format(deg=deg, **fmtspec) + \ - '\n'.join(['nsimd_{simd_ext}_v{typ} in{i} = vec_ld({i} * 16, {in0});'. \ - format(i=i, **fmtspec) \ - for i in range (0, deg)]) - else: - load = 'nsimd_{simd_ext}_v{typ}x{deg} ret;\n'.format(deg=deg, **fmtspec) + \ - '\n'.join(['nsimd_{simd_ext}_v{typ} in{i} = *(({ppc_typ}*) ({in0} + {i}*{vec_size}));'. \ - format(vec_size=str(128//int(typ[1:])), - ppc_typ=ppc_vec_type(typ), i=i, **fmtspec) \ - for i in range (0, deg)]) - - # Load 2 for every supported types - if deg == 2: - if typ[1:] == '32': - return ''' - {load} - - nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh(in0, in1); - nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel(in0, in1); - - ret.v0 = vec_mergeh(tmp0, tmp1); - ret.v1 = vec_mergel(tmp0, tmp1); - - return ret; - '''.format(load=load, **fmtspec) - elif typ[1:] == '16': - return ''' - {load} - - nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh(in0, in1); - nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel(in0, in1); - - in0 = vec_mergeh(tmp0, tmp1); - in1 = vec_mergel(tmp0, tmp1); - - ret.v0 = vec_mergeh(in0, in1); - ret.v1 = vec_mergel(in0, in1); - - return ret; - '''.format(load=load, **fmtspec) - elif typ[1:] == '8': - return ''' - __vector unsigned char perm1 = NSIMD_PERMUTE_MASK_8( - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - __vector unsigned char perm2 = NSIMD_PERMUTE_MASK_8( - 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); - - {load} - - ret.v0 = vec_perm(in0, in1, perm1); - ret.v1 = vec_perm(in0, in1, perm2); - - return ret; - '''.format(load=load, **fmtspec) - - # Load 3 for every supported types - elif deg == 3: - if typ[1:] == '32': - return ''' - __vector char perm1 = NSIMD_PERMUTE_MASK_32(0, 3, 6, 0); - - {load} - - nsimd_{simd_ext}_v{typ} tmp0 = vec_perm(in0, in1, perm1); - nsimd_{simd_ext}_v{typ} tmp1 = vec_perm(in1, in2, perm1); - nsimd_{simd_ext}_v{typ} tmp2 = vec_perm(in2, in0, perm1); - - __vector char perm2 = NSIMD_PERMUTE_MASK_32(0, 1, 2, 5); - __vector char perm3 = NSIMD_PERMUTE_MASK_32(5, 0, 1, 2); - __vector char perm4 = NSIMD_PERMUTE_MASK_32(2, 5, 0, 1); - - - ret.v0 = vec_perm(tmp0, in2, perm2); - ret.v1 = vec_perm(tmp1, in0, perm3); - ret.v2 = vec_perm(tmp2, in1, perm4); - - return ret; - '''.format(load=load, **fmtspec) - elif typ[1:] == '16': - return ''' - {load} - - __vector char permRAB = NSIMD_PERMUTE_MASK_16(0, 3, 6, 9, 12, 15, 0, 0); - __vector char permRDC = NSIMD_PERMUTE_MASK_16(0, 1, 2, 3, 4, 5, 10, 13); - - nsimd_{simd_ext}_v{typ} tmp0 = vec_perm(in0, in1, permRAB); - ret.v0 = vec_perm(tmp0, in2, permRDC); - - __vector char permGAB = NSIMD_PERMUTE_MASK_16(1, 4, 7, 10, 13, 0, 0, 0); - __vector char permGEC = NSIMD_PERMUTE_MASK_16(0, 1, 2, 3, 4, 8, 11, 14); - - nsimd_{simd_ext}_v{typ} tmp1 = vec_perm(in0, in1, permGAB); - ret.v1 = vec_perm(tmp1, in2, permGEC); - - __vector char permBAB = NSIMD_PERMUTE_MASK_16(2, 5, 8, 11, 14, 0, 0, 0); - __vector char permBFC = NSIMD_PERMUTE_MASK_16(0, 1, 2, 3, 4, 9, 12, 15); - - nsimd_{simd_ext}_v{typ} tmp2 = vec_perm(in0, in1, permBAB); - ret.v2 = vec_perm(tmp2, in2, permBFC); - - return ret; - - '''.format(load=load, **fmtspec) - elif typ[1:] == '8': - return ''' - {load} - - __vector char permRAB = NSIMD_PERMUTE_MASK_8(0, 3, 6, 9, 12, 15, - 18, 21, 24, 27, 30, 0, 0, 0, 0, 0); - __vector char permRDC = NSIMD_PERMUTE_MASK_8(0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 17, 20, 23, 26, 29); - - nsimd_{simd_ext}_v{typ} tmp0 = vec_perm(in0, in1, permRAB); - ret.v0 = vec_perm(tmp0, in2, permRDC); - - __vector char permGAB = NSIMD_PERMUTE_MASK_8(1, 4, 7, 10, 13, 16, - 19, 22, 25, 28, 31, 0, 0, 0, 0, 0); - __vector char permGEC = NSIMD_PERMUTE_MASK_8(0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 18, 21, 24, 27, 30); - - nsimd_{simd_ext}_v{typ} tmp1 = vec_perm(in0, in1, permGAB); - ret.v1 = vec_perm(tmp1, in2, permGEC); - - __vector char permBAB = NSIMD_PERMUTE_MASK_8(2, 5, 8, 11, 14, 17, - 20, 23, 26, 29, 0, 0, 0, 0, 0, 0); - __vector char permBFC = NSIMD_PERMUTE_MASK_8(0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 16, 19, 22, 25, 28, 31); - - nsimd_{simd_ext}_v{typ} tmp2 = vec_perm(in0, in1, permBAB); - ret.v2 = vec_perm(tmp2, in2, permBFC); - - return ret; - '''.format(load=load, **fmtspec) - - # load 4 for every supported types - else: - if typ[1:] == '32': - return ''' - {load} - - nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh(in0, in2); - nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel(in0, in2); - nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh(in1, in3); - nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel(in1, in3); - - ret.v0 = vec_mergeh(tmp0, tmp2); - ret.v1 = vec_mergel(tmp0, tmp2); - ret.v2 = vec_mergeh(tmp1, tmp3); - ret.v3 = vec_mergel(tmp1, tmp3); - - return ret; - '''.format (load=load, **fmtspec) - elif typ[1:] == '16': - return ''' - {load} - - ret.v0 = vec_mergeh(in0, in2); - ret.v1 = vec_mergel(in0, in2); - ret.v2 = vec_mergeh(in1, in3); - ret.v3 = vec_mergel(in1, in3); - - nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh(ret.v0, ret.v2); - nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel(ret.v0, ret.v2); - nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh(ret.v1, ret.v3); - nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel(ret.v1, ret.v3); - - ret.v0 = vec_mergeh(tmp0, tmp2); - ret.v1 = vec_mergel(tmp0, tmp2); - ret.v2 = vec_mergeh(tmp1, tmp3); - ret.v3 = vec_mergel(tmp1, tmp3); - - return ret; - '''.format(load=load, **fmtspec) - - elif typ[1:] == '8': - return ''' - {load} - - nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh(in0, in2); - nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel(in0, in2); - nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh(in1, in3); - nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel(in1, in3); - - ret.v0 = vec_mergeh(tmp0, tmp2); - ret.v1 = vec_mergel(tmp0, tmp2); - ret.v2 = vec_mergeh(tmp1, tmp3); - ret.v3 = vec_mergel(tmp1, tmp3); - - tmp0 = vec_mergeh(ret.v0, ret.v2); - tmp1 = vec_mergel(ret.v0, ret.v2); - tmp2 = vec_mergeh(ret.v1, ret.v3); - tmp3 = vec_mergel(ret.v1, ret.v3); - - ret.v0 = vec_mergeh(tmp0, tmp2); - ret.v1 = vec_mergel(tmp0, tmp2); - ret.v2 = vec_mergeh(tmp1, tmp3); - ret.v3 = vec_mergel(tmp1, tmp3); - - return ret; - '''.format(load=load, **fmtspec) - -## Stores of degree 1, 2, 3 and 4 - -def store1234(simd_ext, typ, deg, aligned): - - # store n for 64 bits types - if typ[1:] == '64': - return \ - '\n'.join(['*({{in0}} + {}) = {{in{}}}.v0;'. \ - format(i - 1, i).format(**fmtspec) \ - for i in range(1, deg + 1)]) + '\n' + \ - '\n'.join(['*({{in0}} + {}) = {{in{}}}.v1;'. \ - format(i + deg - 1, i).format(**fmtspec) \ - for i in range(1, deg + 1)]) - - if typ == 'f16': - if deg == 1: - return \ - '''f32 buf[4]; - vec_st({in1}.v0, 0, buf); - *((u16*){in0} ) = nsimd_f32_to_u16(buf[0]); - *((u16*){in0} + 1) = nsimd_f32_to_u16(buf[1]); - *((u16*){in0} + 2) = nsimd_f32_to_u16(buf[2]); - *((u16*){in0} + 3) = nsimd_f32_to_u16(buf[3]); - vec_st({in1}.v1, 0, buf); - *((u16*){in0} + 4) = nsimd_f32_to_u16(buf[0]); - *((u16*){in0} + 5) = nsimd_f32_to_u16(buf[1]); - *((u16*){in0} + 6) = nsimd_f32_to_u16(buf[2]); - *((u16*){in0} + 7) = nsimd_f32_to_u16(buf[3]); - '''.format(**fmtspec) - else: - ret = 'f32 buf[4];\n' - - for i in range(0, deg): - for k in range(0, 2): - ret += 'vec_st({{in{i}}}.v{k}, 0, buf);\n'.\ - format(i=i+1, k=k).format(**fmtspec) - for j in range (0, 4): - ret += '*((u16*){in0} + {shift}) = nsimd_f32_to_u16(buf[{j}]);\n'. \ - format(j=j, shift=i + k*4*deg + j*deg, **fmtspec) - - return ret - - # store 1 for every supported types - if deg == 1: - if aligned: - return 'vec_st({in1}, 0, {in0});'.format(**fmtspec) - else: - return '*(({ppc_typ}*) {in0}) = {in1};'.\ - format(ppc_typ=ppc_vec_type(typ), **fmtspec) - - # Code to store aligned/unaligned vectors - if aligned: - store = '\n'.join(['vec_st(ret{i}, 16*{i}, {in0});'. \ - format(i=i, **fmtspec) \ - for i in range (0, deg)]) - else: - store = '\n'.join(['*({ppc_typ}*) ({in0} + {i}*{vec_size}) = ret{i};'. \ - format(vec_size=str(128//int(typ[1:])), - ppc_typ=ppc_vec_type(typ), i=i, **fmtspec) \ - for i in range (0, deg)]) - - - # store 2 for every supported types - if deg == 2: - return ''' - nsimd_{simd_ext}_v{typ} ret0 = vec_mergeh({in1}, {in2}); - nsimd_{simd_ext}_v{typ} ret1 = vec_mergel({in1}, {in2}); - - {store} - '''.format(store=store, **fmtspec) - - # store 3 for every supported types - elif deg == 3: - if typ[1:] == '32': - return ''' - __vector char perm1 = NSIMD_PERMUTE_MASK_32(0, 2, 4, 6); - __vector char perm2 = NSIMD_PERMUTE_MASK_32(0, 2, 5, 7); - __vector char perm3 = NSIMD_PERMUTE_MASK_32(1, 3, 5, 7); - - nsimd_{simd_ext}_v{typ} tmp0 = vec_perm({in1}, {in2}, perm1); - nsimd_{simd_ext}_v{typ} tmp1 = vec_perm({in3}, {in1}, perm2); - nsimd_{simd_ext}_v{typ} tmp2 = vec_perm({in2}, {in3}, perm3); - - nsimd_{simd_ext}_v{typ} ret0 = vec_perm(tmp0, tmp1, perm1); - nsimd_{simd_ext}_v{typ} ret1 = vec_perm(tmp2, tmp0, perm2); - nsimd_{simd_ext}_v{typ} ret2 = vec_perm(tmp1, tmp2, perm3); - - {store} - '''.format(store=store, **fmtspec) - elif typ[1:] == '16': - return ''' - __vector char permARG = NSIMD_PERMUTE_MASK_16(0, 8, 0, 1, 9, 0, 2, 10); - __vector char permAXB = NSIMD_PERMUTE_MASK_16(0, 1, 8, 3, 4, 9, 6, 7); - - nsimd_{simd_ext}_v{typ} tmp0 = vec_perm({in1}, {in2}, permARG); - nsimd_{simd_ext}_v{typ} ret0 = vec_perm(tmp0, {in3}, permAXB); - - __vector char permBRG = NSIMD_PERMUTE_MASK_16(0, 3, 11, 0, 4, 12, 0, 5); - __vector char permBYB = NSIMD_PERMUTE_MASK_16(10, 1, 2, 11, 4, 5, 12, 7); - - nsimd_{simd_ext}_v{typ} tmp1 = vec_perm({in1}, {in2}, permBRG); - nsimd_{simd_ext}_v{typ} ret1 = vec_perm(tmp1, {in3}, permBYB); - - __vector char permCRG = NSIMD_PERMUTE_MASK_16(13, 0, 6, 14, 0, 7, 15, 0); - __vector char permCZB = NSIMD_PERMUTE_MASK_16(0, 13, 2, 3, 14, 5, 6, 15); - - nsimd_{simd_ext}_v{typ} tmp2 = vec_perm({in1}, {in2}, permCRG); - nsimd_{simd_ext}_v{typ} ret2 = vec_perm(tmp2, {in3}, permCZB); - - {store} - '''.format(store=store, **fmtspec) - elif typ[1:] == '8': - return ''' - __vector char mARG = NSIMD_PERMUTE_MASK_8(0, 16, 0, 1, 17, 0, - 2, 18, 0, 3, 19, 0, 4, 20, 0, 5); - __vector char mAXB = NSIMD_PERMUTE_MASK_8(0, 1, 16, 3, 4, 17, - 6, 7, 18, 9, 10, 19, 12, 13, 20, 15); - - nsimd_{simd_ext}_v{typ} tmp0 = vec_perm({in1}, {in2}, mARG); - nsimd_{simd_ext}_v{typ} ret0 = vec_perm(tmp0, {in3}, mAXB); - - __vector char mBRG = NSIMD_PERMUTE_MASK_8(21, 0, 6, 22, 0, 7, - 23, 0, 8, 24, 0, 9, 25, 0, 10, 26); - __vector char mBYB = NSIMD_PERMUTE_MASK_8(0, 21, 2, 3, 22, 5, - 6, 23, 8, 9, 24, 11, 12, 25, 14, 15); - - nsimd_{simd_ext}_v{typ} tmp1 = vec_perm({in1}, {in2}, mBRG); - nsimd_{simd_ext}_v{typ} ret1 = vec_perm(tmp1, {in3}, mBYB); - - __vector char mCRG = NSIMD_PERMUTE_MASK_8(0, 11, 27, 0, 12, 28, - 0, 13, 29, 0, 14, 30, 0, 15, 31, 0); - __vector char mCZB = NSIMD_PERMUTE_MASK_8(26, 1, 2, 27, 4, 5, - 28, 7, 8, 29, 10, 11, 30, 13, 14, 31); - - nsimd_{simd_ext}_v{typ} tmp2 = vec_perm({in1}, {in2}, mCRG); - nsimd_{simd_ext}_v{typ} ret2 = vec_perm(tmp2, {in3}, mCZB); - - {store} - '''.format(store=store, **fmtspec) - - # store 4 for every supported types - else: - if typ[1:] == '32': - return ''' - nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh({in1}, {in3}); - nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel({in1}, {in3}); - nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh({in2}, {in4}); - nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel({in2}, {in4}); - - nsimd_{simd_ext}_v{typ} ret0 = vec_mergeh(tmp0, tmp2); - nsimd_{simd_ext}_v{typ} ret1 = vec_mergel(tmp0, tmp2); - nsimd_{simd_ext}_v{typ} ret2 = vec_mergeh(tmp1, tmp3); - nsimd_{simd_ext}_v{typ} ret3 = vec_mergel(tmp1, tmp3); - - {store} - '''.format(store=store, **fmtspec) - elif typ[1:] == '16': - return ''' - nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh({in1}, {in3}); - nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel({in1}, {in3}); - nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh({in2}, {in4}); - nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel({in2}, {in4}); - - nsimd_{simd_ext}_v{typ} ret0 = vec_mergeh(tmp0, tmp2); - nsimd_{simd_ext}_v{typ} ret1 = vec_mergel(tmp0, tmp2); - nsimd_{simd_ext}_v{typ} ret2 = vec_mergeh(tmp1, tmp3); - nsimd_{simd_ext}_v{typ} ret3 = vec_mergel(tmp1, tmp3); - - {store} - '''.format(store=store, **fmtspec) - - elif typ[1:] == '8': - return ''' - nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh({in1}, {in3}); - nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel({in1}, {in3}); - nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh({in2}, {in4}); - nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel({in2}, {in4}); - - nsimd_{simd_ext}_v{typ} ret0 = vec_mergeh(tmp0, tmp2); - nsimd_{simd_ext}_v{typ} ret1 = vec_mergel(tmp0, tmp2); - nsimd_{simd_ext}_v{typ} ret2 = vec_mergeh(tmp1, tmp3); - nsimd_{simd_ext}_v{typ} ret3 = vec_mergel(tmp1, tmp3); - - {store} - '''.format(store=store, **fmtspec) - -## Length -def len1(simd_ext, typ): - return 'return (128 / {});'.format(int(typ[1:])) - -## Function for all the operators that take 2 operands and whose Altivec -## function is vec_opname() -def simple_op2(op, simd_ext, typ): - cpuop = {'mul': '*', 'div': '/', 'add': '+', 'sub': '-'} - if simd_ext == 'power7': - if typ in ['f64', 'i64', 'u64'] : - return emulate_64(op, simd_ext, 3 * ['v']) - - if typ == 'f16' : - return emulate_16(op, simd_ext, 2, False) - - return 'return {in0} {op} {in1};'.format(op=cpuop[op], **fmtspec) - -## Binary operators: and, or, xor, andnot - -def bop2(op, simd_ext, typ): - if typ[1:] == '64': - return emulate_64(op, simd_ext, 3 * ['v']) - elif typ == 'f16': - return emulate_16(op, simd_ext, 2, False) - else: - ppcop = {'orb': 'or', 'xorb': 'xor', 'andb': 'and', 'andnotb': 'andc'} - return 'return vec_{op}({in0}, {in1});'. \ - format(op=ppcop[op], **fmtspec) - -## Logical operators: and, or, xor, andnot - -def lop2(op, simd_ext, typ): - if typ[1:] == '64': - return emulate_64(op, simd_ext, 3 * ['l']) - elif typ == 'f16': - return emulate_16(op, simd_ext, 2, True) - else: - ppcop = {'orl': 'or', 'xorl': 'xor', 'andl': 'and', 'andnotl': 'andc'} - return 'return vec_{op}({in0}, {in1});'. \ - format(op=ppcop[op], **fmtspec) - -## Binary not - -def notb1(simd_ext, typ): - if typ[1:] == '64': - return emulate_64('notb', simd_ext, 2 * ['v']) - elif typ == 'f16': - return emulate_16('notb', simd_ext, 1, False) - else: - return 'return vec_nor({in0}, {in0});'.format(**fmtspec) - -## Logical not - -def lnot1(simd_ext, typ): - if typ[1:] == '64': - return emulate_64('notl', simd_ext, 2 * ['l']) - elif typ == 'f16': - return emulate_16('notl', simd_ext, 1, True) - else: - return 'return vec_nor({in0}, {in0});'.format(**fmtspec) - -## Square root - -def sqrt1(simd_ext, typ): - if typ[1:] == '64': - return emulate_64('sqrt', simd_ext, 2 * ['v']) - elif typ == 'f16': - return emulate_16('sqrt', simd_ext, 1, False) - else: - return ''' - /* Can't use vec_rsqrte because the precision is too low */ - nat i; - {typ} buf[{size}]; - vec_st({in0}, 0, buf); - nsimd_cpu_v{typ} tmp, rettmp; - for (i=0; i<{size}; ++i) {{ - tmp.v0 = buf[i]; - rettmp = nsimd_sqrt_cpu_{typ}(tmp); - buf[i] = rettmp.v0; - }} - return vec_ld(0, buf); - '''.format(size=128//int(typ[1:]), **fmtspec) - -## Shifts - -def shl_shr(op, simd_ext, typ): - if typ[1:] == '64': - return '''nsimd_{simd_ext}_v{typ} ret; - nsimd_cpu_v{typ} buf0, bufret; - - buf0.v0 = {in0}.v0; - bufret = nsimd_{op}_cpu_{typ}(buf0, {in1}); - ret.v0 = bufret.v0; - - buf0.v0 = {in0}.v1; - bufret = nsimd_{op}_cpu_{typ}(buf0, {in1}); - ret.v1 = bufret.v0; - - return ret;'''. \ - format(op=op, **fmtspec) - elif typ == 'f16': - return '''nsimd_{simd_ext}_v{typ} ret; - ret.v0 = nsimd_{op}_{nsimd_ext}_f32({in0}.v0, {in1}); - ret.v1 = nsimd_{op}_{nsimd_ext}_f32({in0}.v1, {in1}); - return ret;'''. \ - format(op=op, **fmtspec) - else: - ppcop = {'shl': 'sl', 'shr': 'sr'} - return ''' - nsimd_{simd_ext}_vu{type_size} tmp - = nsimd_set1_{simd_ext}_u{type_size}((u{type_size})({in1})); - return vec_{op}({in0}, tmp);'''. \ - format(op=ppcop[op], type_size=typ[1:], **fmtspec) - -# Set1: splat functions -def set1(simd_ext, typ): - if typ[1:] == '64': - return '''nsimd_{simd_ext}_v{typ} ret; - ret.v0 = {in0}; - ret.v1 = {in0}; - return ret;'''.format(**fmtspec) - elif typ == 'f16': - return '''nsimd_{simd_ext}_vf16 ret; - f32 f = nsimd_f16_to_f32({in0}); - ret.v0 = nsimd_set1_{simd_ext}_f32(f); - ret.v1 = nsimd_set1_{simd_ext}_f32(f); - return ret;'''.format(**fmtspec) - else: - nvar_in_vec = 128 // (int)(typ[1:]) - values = ', '.join(['{in0}'.format(**fmtspec) for i in range(0,nvar_in_vec)]) - return '''{vec} tmp = {{{val}}}; - return tmp;''' \ - .format(val=values, vec=ppc_vec_type(typ), **fmtspec) - -## Comparison operators: ==, <, <=, >, >= - -def cmp2(op, simd_ext, typ): - if typ[1:] == '64': - return emulate_64(op, simd_ext, ['l', 'v', 'v']) - elif typ == 'f16': - return emulate_16(op, simd_ext, 2, True) - else: - return 'return vec_cmp{op}({in0}, {in1});'. \ - format(op=op, **fmtspec) - -## Not equal - -def neq2(simd_ext, typ): - if typ[1:] == '64': - return emulate_64('ne', simd_ext, ['l', 'v', 'v']) - elif typ == 'f16': - return emulate_16('ne', simd_ext, 2, True) - else: - return '''return nsimd_notl_{simd_ext}_{typ}( - nsimd_eq_{simd_ext}_{typ}({in0}, {in1}));'''. \ - format(**fmtspec) - -## If_else - -def if_else3(simd_ext, typ): - if typ[1:] == '64': - return emulate_64('if_else1', simd_ext, ['v', 'l', 'v', 'v']) - elif typ == 'f16': - return emulate_16('if_else1', simd_ext, 3, False) - else: - return 'return vec_sel({in2}, {in1}, {in0});'.format(**fmtspec) - - -## Minimum and maximum - -def minmax2(op, simd_ext, typ): - if typ[1:] == '64': - return emulate_64(op, simd_ext, 3 * ['v']) - elif typ == 'f16': - return emulate_16(op, simd_ext, 2, False) - else: - return 'return vec_{op}({in0},{in1});'.format(op=op, **fmtspec) - - -## Abs - -def abs1(simd_ext, typ): - if typ == 'f16': - return emulate_16('abs', simd_ext, 1, False) - elif typ[0] == 'u': - return 'return {in0};'.format(**fmtspec) - elif typ[1:] == '64': - return emulate_64('abs', simd_ext, 2 * ['v']) - else: - return 'return vec_abs({in0});'.format(**fmtspec) - -## Round, trunc and ceil - -def round1(op, simd_ext, typ): - ppcop = {'round': 'round', 'trunc': 'trunc', 'ceil': 'ceil', - 'floor':'floor'} - if typ[0] == 'i' or typ[0] == 'u': - return 'return {in0};'.format(**fmtspec) - elif typ == 'f16': - return emulate_16(op, simd_ext, 1, False) - elif typ == 'f32': - return 'return vec_{op}({in0});'.format(op=ppcop[op], **fmtspec) - elif typ == 'f64': - return emulate_64(op, simd_ext, 2 * ['v']) - else: - raise ValueError('Unknown round: "{}" for type : "{}"'. \ - format(op, typ)) - -# Round to even -def round_to_even1(simd_ext, typ) : - if typ[0] == 'i' or typ[0] == 'u': - return 'return {in0};'.format(**fmtspec) - elif typ == 'f16': - return emulate_16('round_to_even', simd_ext, 1, False) - elif typ == 'f32': - return \ - '''nsimd_{simd_ext}_v{typ} fl = vec_floor({in0}); - nsimd_{simd_ext}_v{typ} ce = vec_ceil({in0}); - - nsimd_{simd_ext}_v{typ} half = {{0.5f, 0.5f, 0.5f, 0.5f}}; - nsimd_{simd_ext}_v{typ} fl_p_half = fl + half; - nsimd_{simd_ext}_v{typ} flo2 = fl * half; - - nsimd_{simd_ext}_vl{typ} test1 = vec_cmpeq(a0, fl_p_half); - nsimd_{simd_ext}_vl{typ} test2 = vec_cmpeq(vec_floor(flo2), flo2); - nsimd_{simd_ext}_vl{typ} test3 = vec_cmple(a0, fl_p_half); - - nsimd_{simd_ext}_vl{typ} test4 = - vec_or(vec_and(vec_nor(test1, test1), test3), - vec_and(test1, test2)); - - return vec_sel(ce, fl, test4); - '''.format(**fmtspec) - elif typ == 'f64': - return emulate_64('round_to_even', simd_ext, 2 * ['v']) - - -## FMA -def fma(simd_ext, typ): - if typ == 'f32': - return 'return vec_madd({in0}, {in1}, {in2});'.format(**fmtspec) - elif typ == 'f16': - return emulate_16('fma', simd_ext, 3, False) - elif typ[1:] == '64': - return emulate_64('fma', simd_ext, 4 * ['v']) - else: - return 'return {in0}*{in1}+{in2};'.format(**fmtspec) - -## FNMA -def fnma(simd_ext, typ): - if typ == 'f32': - return 'return vec_nmsub({in0}, {in1}, {in2});'.format(**fmtspec) - elif typ == 'f16': - return emulate_16('fnma', simd_ext, 3, False) - elif typ[1:] == '64': - return emulate_64('fnma', simd_ext, 4 * ['v']) - else: - return 'return -{in0}*{in1}+{in2};'.format(**fmtspec) - -## FMS -def fms(op, simd_ext, typ): - if typ == 'f32': - return 'return vec_madd({in0}, {in1}, -{in2});'.format(**fmtspec) - elif typ == 'f16': - return emulate_16('fms', simd_ext, 3, False) - elif typ[1:] == '64': - return emulate_64('fms', simd_ext, 4 * ['v']) - else: - return 'return {in0}*{in1}-{in2};'.format(**fmtspec) - -## FNMS -def fnms(op, simd_ext, typ): - if typ == 'f32': - return 'return vec_nmsub({in0}, {in1}, -{in2});'.format(**fmtspec) - elif typ == 'f16': - return emulate_16('fnms', simd_ext, 3, False) - elif typ[1:] == '64': - return emulate_64('fnms', simd_ext, 4 * ['v']) - else: - return 'return -{in0}*{in1}-{in2};'.format(**fmtspec) - -## Neg - -def neg1(simd_ext, typ): - if typ[1] == 'u': - return ''' - return nsimd_reinterpret_{simd_ext}_i{nbits}_u{nbits}( - nsimd_neg_{simd_ext}_i{nbits}( - nsimd_reinterpret_{simd_ext}_u{nbits}_i{nbits}({in0}))); -e '''.format(nbits=typ[1:], **fmtspec) - - elif typ[1:] == '64': - return emulate_64('neg', simd_ext, 2 * ['v']) - elif typ == 'f16': - return emulate_16('neg', simd_ext, 1, False) - else: - return 'return -{in0};'.format(**fmtspec) - - -## Reciprocals -def recs1(op, simd_ext, typ): - if typ == 'f16': - return emulate_16(op, simd_ext, 1, False) - elif typ[1:] == '64': - return emulate_64(op, simd_ext, 2 * ['v']) - elif op == 'rec11': - return 'return vec_re({in0});'.format(**fmtspec) - elif op == 'rec': - return 'return nsimd_set1_{simd_ext}_f32(1.f)/{in0};'. \ - format(vec_type=ppc_vec_type(typ), **fmtspec) - elif op == 'rsqrt11': - return 'return vec_rsqrte({in0});'.format(**fmtspec) - -## Load of logicals -def loadl(aligned, simd_ext, typ): - return \ - '''/* This can surely be improved but it is not our priority. */ - return nsimd_notl_{simd_ext}_{typ}(nsimd_eq_{simd_ext}_{typ}( - nsimd_load{align}_{simd_ext}_{typ}( - {in0}), nsimd_set1_{simd_ext}_{typ}({zero})));'''. \ - format(align='a' if aligned else 'u', - zero = 'nsimd_f32_to_f16(0.0f)' if typ == 'f16' - else '({})0'.format(typ), **fmtspec) - -## Store of logicals - -def storel(aligned, simd_ext, typ): - return \ - '''/* This can surely be improved but it is not our priority. */ - nsimd_store{align}_{simd_ext}_{typ}({in0}, - nsimd_if_else1_{simd_ext}_{typ}({in1}, - nsimd_set1_{simd_ext}_{typ}({one}), - nsimd_set1_{simd_ext}_{typ}({zero})));'''. \ - format(align = 'a' if aligned else 'u', - one = 'nsimd_f32_to_f16(1.0f)' if typ == 'f16' - else '({})1'.format(typ), - zero = 'nsimd_f32_to_f16(0.0f)' if typ == 'f16' - else '({})0'.format(typ), **fmtspec) - -## All and any -def allany1(op, simd_ext, typ): - binop = '&&' if op == 'all' else '||' - - if typ == 'f16': - return \ - '''return nsimd_{op}_{simd_ext}_f32({in0}.v0) {binop} - nsimd_{op}_{simd_ext}_f32({in0}.v1);'''. \ - format(op=op, binop=binop, **fmtspec) - elif typ[1:] == '64': - return 'return {in0}.v0 {binop} {in0}.v1;'. \ - format(binop=binop, **fmtspec) - else: - values = ', '.join(['0x0' for i in range(0,16)]) - return \ - '''nsimd_{simd_ext}_vu8 reg = {{{values}}}; - return vec_{op}_gt(({vec_type}){in0}, ({vec_type})reg);'''\ - .format(values=values, vec_type=ppc_vec_type('u'+typ[1:]), op=op, - **fmtspec) - -## nbtrue - -def nbtrue1(simd_ext, typ): - if typ == 'f16': - return \ - '''return nsimd_nbtrue_{simd_ext}_f32({in0}.v0) + - nsimd_nbtrue_{simd_ext}_f32({in0}.v1);'''. \ - format(**fmtspec) - elif typ[1:] == '64': - return 'return -(int)((i64)({in0}.v0) + (i64)({in0}.v1));'. \ - format(**fmtspec) - else: - return \ - '''nat i; - int ret = 0; - {typ} buf[{size}]; - nsimd_storelu_{simd_ext}_{typ}(buf, {in0}); - for (i=0; i<{size}; ++i) {{ - ret += (u8)buf[i] ? 1:0; - }} - return ret;''' \ - .format(size=128//int(typ[1:]), **fmtspec) - -## Reinterpret logical - -def reinterpretl1(simd_ext, from_typ, to_typ): - if from_typ == to_typ: - return 'return {in0};'.format(**fmtspec) - elif from_typ[1:] == '64': - return \ - '''nsimd_{simd_ext}_vl{to_typ} ret; - memcpy(&ret.v0, &{in0}.v0, sizeof(ret.v0)); - memcpy(&ret.v1, &{in0}.v1, sizeof(ret.v1)); - return ret;'''.format(**fmtspec) - elif from_typ == 'f16': - return \ - '''{to_typ} buf[8]; - f32 buf_conv[4]; - - vec_st((__vector float){in0}.v0, 0, buf_conv); - buf[0] = ({to_typ})nsimd_f32_to_u16(buf_conv[0]); - buf[1] = ({to_typ})nsimd_f32_to_u16(buf_conv[1]); - buf[2] = ({to_typ})nsimd_f32_to_u16(buf_conv[2]); - buf[3] = ({to_typ})nsimd_f32_to_u16(buf_conv[3]); - - vec_st((__vector float){in0}.v1, 0, buf_conv); - buf[4] = ({to_typ})nsimd_f32_to_u16(buf_conv[0]); - buf[5] = ({to_typ})nsimd_f32_to_u16(buf_conv[1]); - buf[6] = ({to_typ})nsimd_f32_to_u16(buf_conv[2]); - buf[7] = ({to_typ})nsimd_f32_to_u16(buf_conv[3]); - - return ({ppc_to_typ})vec_ld(0, buf);'''.\ - format(ppc_to_typ=ppc_vec_typel(to_typ), **fmtspec) - elif to_typ == 'f16': - return \ - '''nsimd_{simd_ext}_vlf16 ret; - u16 buf_conv[8]; - f32 buf[4]; - - vec_st({in0}, 0, buf_conv); - - buf[0] = nsimd_u16_to_f32(buf_conv[0]); - buf[1] = nsimd_u16_to_f32(buf_conv[1]); - buf[2] = nsimd_u16_to_f32(buf_conv[2]); - buf[3] = nsimd_u16_to_f32(buf_conv[3]); - ret.v0 = (__vector __bool int) vec_ld(0, buf); - - buf[0] = nsimd_u16_to_f32(buf_conv[4]); - buf[1] = nsimd_u16_to_f32(buf_conv[5]); - buf[2] = nsimd_u16_to_f32(buf_conv[6]); - buf[3] = nsimd_u16_to_f32(buf_conv[7]); - ret.v1 = (__vector __bool int) vec_ld(0, buf); - - return ret;'''.format(**fmtspec) - else: - return 'return ({ppc_to_typ}) {in0};'. \ - format(ppc_to_typ=ppc_vec_typel(to_typ), **fmtspec) - -## Convert - -def convert1(simd_ext, from_typ, to_typ): - if from_typ == to_typ: - return 'return {in0};'.format(**fmtspec) - elif from_typ == 'f16': - if to_typ == 'u16': - return \ - '''return vec_packsu((__vector unsigned int)vec_ctu({in0}.v0, 0), - (__vector unsigned int)vec_ctu({in0}.v1, 0));'''.\ - format(**fmtspec) - elif to_typ == 'i16': - return \ - '''return vec_packs((__vector signed int)vec_cts({in0}.v0, 0), - (__vector signed int)vec_cts({in0}.v1, 0));'''.\ - format(**fmtspec) - - elif to_typ == 'f16': - if from_typ == 'u16': - return \ - '''nsimd_{simd_ext}_vf16 ret; - /* Unpack extends the sign, we need to remove the extra 1s */ - nsimd_power7_vi32 mask = {{0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}}; - - ret.v0 = vec_ctf(vec_and(vec_unpackh((__vector short)a0), mask), 0); - ret.v1 = vec_ctf(vec_and(vec_unpackl((__vector short)a0), mask), 0); - - return ret;'''.format(**fmtspec) - elif from_typ == 'i16': - return \ - '''nsimd_{simd_ext}_vf16 ret; - ret.v0=vec_ctf(vec_unpackh({in0}), 0); - ret.v1=vec_ctf(vec_unpackl({in0}), 0); - return ret;'''.format(**fmtspec) - elif from_typ[1:] == '64': - return \ - '''nsimd_{simd_ext}_v{to_typ} ret; - ret.v0 = ({to_typ})({in0}.v0); - ret.v1 = ({to_typ})({in0}.v1); - return ret;'''.format(**fmtspec) - elif from_typ == 'f32' and to_typ == 'i32': - return 'return vec_cts({in0}, 0);'.format(**fmtspec) - elif from_typ == 'f32' and to_typ == 'u32': - return 'return vec_ctu({in0}, 0);'.format(**fmtspec) - elif (from_typ == 'i32' or from_typ == 'u32') and to_typ == 'f32': - return 'return vec_ctf({in0}, 0);'.format(**fmtspec) - elif from_typ in common.iutypes and to_typ in common.iutypes: - return 'return ({cast}) {in0};'. \ - format(cast=ppc_vec_type(to_typ), **fmtspec) - else: - raise ValueError('Unknown conversion: "{}" to "{}"'. \ - format(from_typ, to_typ)) - -## Reinterpret - -def reinterpret1(simd_ext, from_typ, to_typ): - if from_typ == to_typ: - return 'return {in0};'.format(**fmtspec) - elif from_typ[1:] == '64': - return ''' - nsimd_{simd_ext}_v{to_typ} ret; - memcpy(&ret.v0, &{in0}.v0, sizeof(ret.v0)); - memcpy(&ret.v1, &{in0}.v1, sizeof(ret.v1)); - return ret; - '''.format(**fmtspec) - elif from_typ == 'f16': - return '''{to_typ} buf[8]; - f32 buf_conv[4]; - - vec_st({in0}.v0, 0, buf_conv); - buf[0] = ({to_typ})nsimd_f32_to_u16(buf_conv[0]); - buf[1] = ({to_typ})nsimd_f32_to_u16(buf_conv[1]); - buf[2] = ({to_typ})nsimd_f32_to_u16(buf_conv[2]); - buf[3] = ({to_typ})nsimd_f32_to_u16(buf_conv[3]); - - vec_st({in0}.v1, 0, buf_conv); - buf[4] = ({to_typ})nsimd_f32_to_u16(buf_conv[0]); - buf[5] = ({to_typ})nsimd_f32_to_u16(buf_conv[1]); - buf[6] = ({to_typ})nsimd_f32_to_u16(buf_conv[2]); - buf[7] = ({to_typ})nsimd_f32_to_u16(buf_conv[3]); - - return vec_ld(0, buf);'''.format(**fmtspec) - elif to_typ == 'f16': - return '''nsimd_{simd_ext}_vf16 ret; - u16 buf_conv[8]; - f32 buf[4]; - - vec_st({in0}, 0, ({from_typ}*) buf_conv); - - buf[0] = nsimd_u16_to_f32(buf_conv[0]); - buf[1] = nsimd_u16_to_f32(buf_conv[1]); - buf[2] = nsimd_u16_to_f32(buf_conv[2]); - buf[3] = nsimd_u16_to_f32(buf_conv[3]); - ret.v0 = vec_ld(0, buf); - - buf[0] = nsimd_u16_to_f32(buf_conv[4]); - buf[1] = nsimd_u16_to_f32(buf_conv[5]); - buf[2] = nsimd_u16_to_f32(buf_conv[6]); - buf[3] = nsimd_u16_to_f32(buf_conv[7]); - ret.v1 = vec_ld(0, buf); - return ret;'''.format(**fmtspec) - else: - return 'return ({typ_ppc}) {in0};'. \ - format(typ_ppc=ppc_vec_type(to_typ), **fmtspec) - -## reverse - -def reverse1(simd_ext, typ): - if typ == 'f16': - return '''nsimd_{simd_ext}_vf16 ret; - ret.v0 = nsimd_reverse_{simd_ext}_f32({in0}.v1); - ret.v1 = nsimd_reverse_{simd_ext}_f32({in0}.v0); - return ret;'''.format(**fmtspec) - elif typ[1:] == '8': - return '''__vector unsigned char perm = - {{0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, - 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}}; - return vec_perm({in0}, perm, perm);'''.format (**fmtspec) - elif typ[1:] == '16': - return ''' __vector unsigned char perm = - {{0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09, - 0x06, 0x07, 0x04, 0x05, 0x02, 0x03, 0x00, 0x01}}; - return vec_perm({in0}, perm, perm);'''.format (**fmtspec) - elif typ[1:] == '32': - return ''' __vector unsigned char perm = - {{0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, - 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03}}; - return vec_perm({in0}, perm, perm);'''.format (**fmtspec) - elif typ[1:] == '64': - return '''nsimd_{simd_ext}_v{typ} ret; - ret.v0 = {in0}.v1; - ret.v1 = {in0}.v0; - return ret;'''.format (**fmtspec) - -## Horizontal sum - -def addv(simd_ext, typ): - if typ == 'f16': - return '''return nsimd_f32_to_f16(nsimd_addv_{simd_ext}_f32({in0}.v0) - + nsimd_addv_{simd_ext}_f32({in0}.v1));'''. \ - format(**fmtspec) - elif typ[1:] == '64': - return 'return {in0}.v0 + {in0}.v1;'.format(**fmtspec) - else: - return \ - '''nat i; - {typ} ret = ({typ}) 0; - {typ} buf[{size}]; - vec_st({in0}, 0, buf); - for (i=0; i<{size}; ++i) {{ - ret += buf[i]; - }} - return ret;''' \ - .format(size=128//int(typ[1:]), **fmtspec) - -# ----------------------------------------------------------------------------- -# Up convert - -def upcvt1(simd_ext, from_typ, to_typ): - if from_typ == 'f16' and to_typ == 'f32': - return \ - '''nsimd_{simd_ext}_v{to_typ}x2 ret; - ret.v0 = {in0}.v0; - ret.v1 = {in0}.v1; - return ret;'''.format(**fmtspec) - - elif from_typ == 'f16' and to_typ[1:] == '32': - sign='u' if to_typ[0]=='u' else 's' - return \ - '''nsimd_{simd_ext}_v{to_typ}x2 ret; - ret.v0 = vec_ct{sign}({in0}.v0, 0); - ret.v1 = vec_ct{sign}({in0}.v1, 0); - return ret;'''.format(sign=sign, **fmtspec) - - elif from_typ[1:] == '8' and to_typ == 'f16': - return \ - '''nsimd_{simd_ext}_vf16x2 ret; - nsimd_{simd_ext}_vi16x2 tmp; - tmp = nsimd_upcvt_{simd_ext}_i16_{sign}8(a0); - ret.v0 = nsimd_cvt_{simd_ext}_f16_i16(tmp.v0); - ret.v1 = nsimd_cvt_{simd_ext}_f16_i16(tmp.v1); - return ret;'''.format(sign=from_typ[0], **fmtspec) - - elif from_typ[1:] == '32' and to_typ[1:] == '64': - return \ - '''nsimd_{simd_ext}_v{to_typ}x2 ret; - {from_typ} buf[4]; - vec_st({in0}, 0, buf); - ret.v0.v0 = ({to_typ})buf[0]; - ret.v0.v1 = ({to_typ})buf[1]; - ret.v1.v0 = ({to_typ})buf[2]; - ret.v1.v1 = ({to_typ})buf[3]; - return ret;'''.format(**fmtspec) - elif from_typ[0] == 'u' and to_typ[0] != 'f': - mask = 'nsimd_{simd_ext}_v{sign}32 mask = {{0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}};' \ - if from_typ=='u16' else \ - '''nsimd_{simd_ext}_v{sign}16 mask = {{0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF}};''' - mask = mask.format(sign=to_typ[0], **fmtspec) - - return \ - '''nsimd_{simd_ext}_v{to_typ}x2 ret; - ret.v0 = ({ppc_typ}) (vec_unpackh(({signed_ppc_type}){in0})); - ret.v1 = ({ppc_typ}) (vec_unpackl(({signed_ppc_type}){in0})); - - /* Unpack extends the sign, we need to remove the extra 1s */ - {mask} - ret.v0 = vec_and(ret.v0, mask); - ret.v1 = vec_and(ret.v1, mask); - - return ret;'''. \ - format(ppc_typ=ppc_vec_type(to_typ), - signed_ppc_type=ppc_vec_type('i'+from_typ[1:]), - mask=mask, - **fmtspec) - elif from_typ[0] == 'u' and to_typ == 'f32': - return \ - '''nsimd_{simd_ext}_vf32x2 ret; - nsimd_{simd_ext}_vi32x2 tmp; - - tmp.v0 = (vec_unpackh(({signed_ppc_typ}){in0})); - tmp.v1 = (vec_unpackl(({signed_ppc_typ}){in0})); - - /* Unpack extends the sign, we need to remove the extra 1s */ - nsimd_{simd_ext}_vi32 mask = {{0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}}; - ret.v0 = vec_ctf(vec_and(tmp.v0, mask), 0); - ret.v1 = vec_ctf(vec_and(tmp.v1, mask), 0); - - return ret;'''. \ - format(ppc_typ=ppc_vec_type(to_typ), - signed_ppc_typ=ppc_vec_type('i'+from_typ[1:]), - **fmtspec) - elif from_typ == 'i16' and to_typ == 'f32': - return \ - '''nsimd_{simd_ext}_v{to_typ}x2 ret; - ret.v0 = vec_ctf(vec_unpackh({in0}), 0); - ret.v1 = vec_ctf(vec_unpackl({in0}), 0); - return ret;'''. \ - format(ppc_typ=ppc_vec_type(to_typ), **fmtspec) - else: - return \ - '''nsimd_{simd_ext}_v{to_typ}x2 ret; - ret.v0 = ({ppc_typ}) (vec_unpackh({in0})); - ret.v1 = ({ppc_typ}) (vec_unpackl({in0})); - return ret;'''. \ - format(ppc_typ=ppc_vec_type(to_typ), **fmtspec) - -# ----------------------------------------------------------------------------- -# Down convert - -def downcvt1(simd_ext, from_typ, to_typ): - if from_typ[1:] == '64' and to_typ[1:]== '32': - return \ - '''{to_typ} buf[4]; - buf[0] = ({to_typ}){in0}.v0; - buf[1] = ({to_typ}){in0}.v1; - buf[2] = ({to_typ}){in1}.v0; - buf[3] = ({to_typ}){in1}.v1; - return vec_ld(0, buf);'''.format(**fmtspec) - - elif from_typ == 'f16' and to_typ[1:] == '8': - return \ - '''return nsimd_downcvt_{simd_ext}_{sign}8_{sign}16( - nsimd_cvt_{simd_ext}_{sign}16_f16(a0), - nsimd_cvt_{simd_ext}_{sign}16_f16(a1));'''\ - .format(sign=to_typ[0], **fmtspec) - - elif from_typ == 'f32' and to_typ == 'f16': - return \ - '''nsimd_{simd_ext}_vf16 ret; - ret.v0 = {in0}; - ret.v1 = {in1}; - return ret;'''.format(**fmtspec) - - elif from_typ[1:] == '32' and to_typ == 'f16': - return \ - '''nsimd_{simd_ext}_vf16 ret; - ret.v0 = vec_ctf({in0}, 0); - ret.v1 = vec_ctf({in1}, 0); - return ret;'''.format(**fmtspec) - - elif from_typ == 'f32' and (to_typ[0] == 'u' or to_typ[0] == 'i'): - conv='(__vector unsigned int)vec_ctu' if to_typ[0]=='u' \ - else '(__vector signed int) vec_cts' - - return \ - '''return ({ppc_typ})vec_pack({conv}({in0}, 0), {conv}({in1}, 0));'''.\ - format(ppc_typ=ppc_vec_type(to_typ), conv=conv, **fmtspec) - - else: - return \ - '''return ({ppc_typ})vec_pack({in0}, {in1});'''. \ - format(ppc_typ=ppc_vec_type(to_typ), **fmtspec) - - -# # ----------------------------------------------------------------------------- -# ## zip functions - -# def zip(func, simd_ext, typ): -# if typ[1:] == '64': -# return '''nsimd_{simd_ext}_v{typ} ret; -# ret.v0 = {in0}.v{i}; -# ret.v1 = {in1}.v{i}; -# return ret;'''. \ -# format(**fmtspec, -# i= '0' if func in ['zip1', 'uzp1'] else '1') -# else : -# return ''' -# return vec_vpkudum(vec_vupk{suff}({in0}), vec_vupk{suff}({in1}));'''. \ -# format(**fmtspec, -# func=func, -# suff= 'lsw' if func == 'ziplo' else 'hsw') - -# # ----------------------------------------------------------------------------- -# ## unzip functions - -# def unzip(func, simd_ext, typ): -# if typ[1:] == '64': -# return '''nsimd_{simd_ext}_v{typ} ret; -# ret.v0 = {in0}.v{i}; -# ret.v1 = {in1}.v{i}; -# return ret;'''. \ -# format(**fmtspec, -# i= '0' if func in ['zip1', 'uzp1'] else '1') -# else : -# return '''{simd_typ} aps = {in0}; -# {simd_typ} bps = {in1}; -# {simd_typ} tmp; -# int j = 0; -# int step = (int)log2({nb_reg}/sizeof({typ})); -# while (j < step) {{ -# tmp = nsimd_ziplo_{simd_ext}_{typ}(aps, bps); -# bps = nsimd_ziphi_{simd_ext}_{typ}(aps, bps); -# aps = tmp; -# j++; -# }} -# return {ret};'''. \ -# format(**fmtspec, -# simd_typ=get_type(simd_ext, typ), -# nb_reg=get_nb_registers(simd_ext), -# ret='aps' if func == 'unziplo' else 'bps') - - -## get_impl function - -def get_impl(func, simd_ext, from_typ, to_typ): - global fmtspec - - fmtspec = { - 'simd_ext': simd_ext, - 'typ': from_typ, - 'styp': get_type(simd_ext, from_typ), - 'from_typ': from_typ, - 'to_typ': to_typ, - 'in0': common.in0, - 'in1': common.in1, - 'in2': common.in2, - 'in3': common.in3, - 'in4': common.in4, - 'in5': common.in5, - 'typnbits': from_typ[1:] - } - - impls = { - 'loada': 'load1234(simd_ext, from_typ, 1, True)', - 'load2a': 'load1234(simd_ext, from_typ, 2, True)', - 'load3a': 'load1234(simd_ext, from_typ, 3, True)', - 'load4a': 'load1234(simd_ext, from_typ, 4, True)', - 'loadu': 'load1234(simd_ext, from_typ, 1, False)', - 'load2u': 'load1234(simd_ext, from_typ, 2, False)', - 'load3u': 'load1234(simd_ext, from_typ, 3, False)', - 'load4u': 'load1234(simd_ext, from_typ, 4, False)', - 'storea': 'store1234(simd_ext, from_typ, 1, True)', - 'store2a': 'store1234(simd_ext, from_typ, 2, True)', - 'store3a': 'store1234(simd_ext, from_typ, 3, True)', - 'store4a': 'store1234(simd_ext, from_typ, 4, True)', - 'storeu': 'store1234(simd_ext, from_typ, 1, False)', - 'store2u': 'store1234(simd_ext, from_typ, 2, False)', - 'store3u': 'store1234(simd_ext, from_typ, 3, False)', - 'store4u': 'store1234(simd_ext, from_typ, 4, False)', - 'andb': 'bop2("andb", simd_ext, from_typ)', - 'xorb': 'bop2("xorb", simd_ext, from_typ)', - 'orb': 'bop2("orb", simd_ext, from_typ)', - 'andl': 'lop2("andl", simd_ext, from_typ)', - 'xorl': 'lop2("xorl", simd_ext, from_typ)', - 'orl': 'lop2("orl", simd_ext, from_typ)', - 'notb': 'notb1(simd_ext, from_typ)', - 'notl': 'lnot1(simd_ext, from_typ)', - 'andnotb': 'bop2("andnotb", simd_ext, from_typ)', - 'andnotl': 'lop2("andnotl", simd_ext, from_typ)', - 'add': 'simple_op2("add", simd_ext, from_typ)', - 'sub': 'simple_op2("sub", simd_ext, from_typ)', - 'div': 'simple_op2("div", simd_ext, from_typ)', - 'sqrt': 'sqrt1(simd_ext, from_typ)', - 'len': 'len1(simd_ext, from_typ)', - 'mul': 'simple_op2("mul", simd_ext, from_typ)', - 'shl': 'shl_shr("shl", simd_ext, from_typ)', - 'shr': 'shl_shr("shr", simd_ext, from_typ)', - 'set1': 'set1(simd_ext, from_typ)', - 'eq': 'cmp2("eq", simd_ext, from_typ)', - 'lt': 'cmp2("lt", simd_ext, from_typ)', - 'le': 'cmp2("le", simd_ext, from_typ)', - 'gt': 'cmp2("gt", simd_ext, from_typ)', - 'ge': 'cmp2("ge", simd_ext, from_typ)', - 'ne': 'neq2(simd_ext, from_typ)', - 'if_else1': 'if_else3(simd_ext, from_typ)', - 'min': 'minmax2("min", simd_ext, from_typ)', - 'max': 'minmax2("max", simd_ext, from_typ)', - 'loadla': 'loadl(True, simd_ext, from_typ)', - 'loadlu': 'loadl(False, simd_ext, from_typ)', - 'storela': 'storel(True, simd_ext, from_typ)', - 'storelu': 'storel(False, simd_ext, from_typ)', - 'abs': 'abs1(simd_ext, from_typ)', - 'fma': 'fma(simd_ext, from_typ)', - 'fnma': 'fnma(simd_ext, from_typ)', - 'fms': 'fms("fms", simd_ext, from_typ)', - 'fnms': 'fnms("fnms", simd_ext, from_typ)', - 'ceil': 'round1("ceil", simd_ext, from_typ)', - 'floor': 'round1("floor", simd_ext, from_typ)', - 'trunc': 'round1("trunc", simd_ext, from_typ)', - 'round_to_even': 'round_to_even1(simd_ext, from_typ)', - 'all': 'allany1("all", simd_ext, from_typ)', - 'any': 'allany1("any", simd_ext, from_typ)', - 'reinterpret': 'reinterpret1(simd_ext, from_typ, to_typ)', - 'reinterpretl': 'reinterpretl1(simd_ext, from_typ, to_typ)', - 'cvt': 'convert1(simd_ext, from_typ, to_typ)', - 'rec11': 'recs1("rec11", simd_ext, from_typ)', - 'rsqrt11': 'recs1("rsqrt11", simd_ext, from_typ)', - 'rec': 'recs1("rec", simd_ext, from_typ)', - 'neg': 'neg1(simd_ext, from_typ)', - 'nbtrue': 'nbtrue1(simd_ext, from_typ)', - 'reverse': 'reverse1(simd_ext, from_typ)', - 'addv': 'addv(simd_ext, from_typ)', - 'upcvt': 'upcvt1(simd_ext, from_typ, to_typ)', - 'downcvt': 'downcvt1(simd_ext, from_typ, to_typ)', - 'ziplo': 'zip("ziplo", simd_ext, from_typ)', - 'ziphi': 'zip("ziphi", simd_ext, from_typ)', - 'unziplo': 'unzip("unziplo", simd_ext, from_typ)', - 'unziphi': 'unzip("unziphi", simd_ext, from_typ)' - } - if simd_ext not in get_simd_exts(): - raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) - if not from_typ in common.types: - raise ValueError('Unknown type "{}"'.format(from_typ)) - if not func in impls: - return common.NOT_IMPLEMENTED - else: - return eval(impls[func]) diff --git a/egg/experiments/gen_sleef_operators.py b/egg/experiments/gen_sleef_operators.py new file mode 100644 index 00000000..e5855ce3 --- /dev/null +++ b/egg/experiments/gen_sleef_operators.py @@ -0,0 +1,117 @@ +# Copyright (c) 2021 Agenium Scale +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import os + +script_dir = os.path.dirname(os.path.realpath(__file__)) +sleef_dir = os.path.join(script_dir, '..', '..', '_deps-sleef') +sleef_version = '3.5.1' + +funcproto = os.path.join(sleef_dir, 'sleef-{}'.format(sleef_version), + 'src', 'libm', 'funcproto.h') + +ulp_suffix = { + '0' : '', + '1' : '_u1', + '2' : '_u05', + '3' : '_u35', + '4' : '_u15', + '5' : '_u3500' +} + +func_type = { + '0' : 'v {} v', + '1' : 'v {} v v', + '2' : 'vx2 {} v', + '3' : 'v {} v p', + '4' : 'v {} v', + '5' : 'v {} v v v', + '6' : 'vx2 {} v', + '7' : 'p {} p', + '8' : '* {} p' +} + +props = { + 'cos' : ['cosine', 'DocTrigo', 'R'], + 'sin' : ['sine', 'DocTrigo', 'R'], + 'fastcos' : ['cosine', 'DocTrigo', 'R'], + 'fastsin' : ['sine', 'DocTrigo', 'R'], + 'cospi' : ['cosine of multiple of pi argument', 'DocTrigo', 'R'], + 'sinpi' : ['sine of multiple of pi argument', 'DocTrigo', 'R'], + 'tan' : ['tangent', 'DocTrigo', 'R\{(z+0.5)*pi}'], + 'acos' : ['arc cosine', 'DocTrigo', '(-1,1)'], + 'asin' : ['arc sine', 'DocTrigo', '(-1,1)'], + 'atan' : ['arc tangent', 'DocTrigo', 'R'], + 'atan2' : ['arc tangent', 'DocTrigo', 'RxR'], + + 'log' : ['natural logarithmic', 'DocExpLog', '(0,Inf)'], + 'log2' : ['base-2 logarithmic', 'DocExpLog', '(0,Inf)'], + 'log10' : ['base-10 logarithmic', 'DocExpLog', '(0,Inf)'], + 'log1p' : ['logarithm of one plus argument', 'DocExpLog', '(-1,Inf)'], + 'exp' : ['exponential', 'DocExpLog', 'R'], + 'exp2' : ['base-2 exponential', 'DocExpLog', 'R'], + 'exp10' : ['base-10 exponential', 'DocExpLog', 'R'], + 'expm1' : ['exponential minus 1', 'DocExpLog', 'R'], + 'pow' : ['power', 'DocExpLog', 'RxR'], + 'fastpow' : ['power', 'DocExpLog', 'RxR'], + + 'cbrt' : ['cubic root', 'DocBasicArithmetic', 'R'], + 'hypot' : ['hypotenuse', 'DocBasicArithmetic', 'RxR'], + + 'sinh': ['hyperbolic sine', 'DocHyper', 'R'], + 'cosh': ['hyperbolic cosine', 'DocHyper', 'R'], + 'tanh': ['hyperbolic tangent', 'DocHyper', 'R'], + 'asinh': ['hyperbolic arc sine', 'DocHyper', 'R'], + 'acosh': ['hyperbolic arc cosine', 'DocHyper', '(1,Inf)'], + 'atanh': ['hyperbolic arc tangent', 'DocHyper', '(-1,1)'], + + 'lgamma' : ['log gamma', 'DocMisc', 'R\{-n}'], + 'tgamma' : ['gamma', 'DocMisc', 'R\{-n}'], + 'erf' : ['error function', 'DocMisc', 'R'], + 'erfc' : ['complementary error function', 'DocMisc', 'R'] +} + +with open(funcproto, 'r') as fin: + for line in fin: + if not (line.find('{') != -1 and line.find('}') != -1): + continue + items = [item.strip() for item in line.strip(' \n\r{},').split(',')] + items[0] = items[0].strip('"') + if items[0] == 'NULL': + break + if items[0] not in props: + continue + name = items[0] + '_u' + items[1] + symbol = 'nsimd_sleef_{}'.format(name) + prop = props[items[0]] + print('Class {}{}(SrcOperator):'. \ + format(name[0].upper(), name[1:])) + print(' full_name = \'{}\''.format(prop[0])) + print(' signature = \'{}\''.format(func_type[items[3]]) \ + .format(name)) + print(' sleef_symbol_prefix = \'{}\''.format(symbol)) + print(' domain = Domain(\'{}\')'.format(prop[2])) + print(' categories = [{}]'.format(prop[1])) + print(' desc = \'Compute the {} of its argument{} with ' \ + 'a precision of {} ulps. For more informations visit ' \ + '.\''.format(prop[0], + 's' if items[3] in ['1', '3', '5'] else '', + float(items[1]) / 10.0)) + print('') diff --git a/egg/experiments/round-ppc.c b/egg/experiments/round-ppc.c new file mode 100644 index 00000000..c42bdece --- /dev/null +++ b/egg/experiments/round-ppc.c @@ -0,0 +1,37 @@ +#include +#include + +void pp(const char *prefix, FILE *out, float buf[4]) { + fputs(prefix, out); + fputc('{', out); + for (int i = 0; i < 4; i++) { + fprintf(out, " %f", (double)buf[i]); + } + fputs(" }\n", out); +} + +int main() { + float res[4]; + + float buf[4]; + buf[0] = -1.5f; + buf[1] = -0.5f; + buf[2] = 0.5f; + buf[3] = 1.5f; + __vector float v = *(__vector float *)buf; + + + pp(" buf = ", stdout, buf); + + + *(__vector float *)res = vec_round(v); + pp(" round = ", stdout, res); + + *(__vector float *)res = vec_rint(v); + pp(" rint = ", stdout, res); + + *(__vector float *)res = vec_roundc(v); + pp("roundc = ", stdout, res); + + return 0; +} diff --git a/egg/gen_adv_c_api.py b/egg/gen_adv_c_api.py new file mode 100644 index 00000000..a9eeaedc --- /dev/null +++ b/egg/gen_adv_c_api.py @@ -0,0 +1,324 @@ +# Copyright (c) 2021 Agenium Scale +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import common +import os +import operators + +# ----------------------------------------------------------------------------- +# Construct C11 types + +def get_c11_types(simd_ext): + ret = '' + for se in common.simds_deps[simd_ext]: + ret += '\n\n'.join([ + '''typedef NSIMD_STRUCT nsimd_pack_{typ}_{se} {{ + nsimd_{se}_v{typ} v; + }} nsimd_pack_{typ}_{se}; + + NSIMD_INLINE nsimd_pack_{typ}_{se} + nsimd_make_pack_{typ}_{se}(nsimd_{se}_v{typ} v) {{ + return (nsimd_pack_{typ}_{se}){{ v }}; + }}'''.format(typ=typ, se=se) for typ in common.types]) + ret += '\n\n' + ret += '\n\n'.join([ + '''typedef NSIMD_STRUCT nsimd_packl_{typ}_{se} {{ + nsimd_{se}_vl{typ} v; + }} nsimd_packl_{typ}_{se}; + + NSIMD_INLINE nsimd_packl_{typ}_{se} + nsimd_make_packl_{typ}_{se}(nsimd_{se}_vl{typ} v) {{ + return (nsimd_packl_{typ}_{se}){{ v }}; + }}'''.format(typ=typ, se=se) for typ in common.types]) + for deg in [2, 3, 4]: + vs = ', '.join(['v{}'.format(i) for i in range(deg)]) + avs = ', '.join(['{{a0.v{}}}'.format(i) for i in range(deg)]) + ret += '\n\n' + ret += '\n\n'.join([ + '''typedef NSIMD_STRUCT nsimd_packx{deg}_{typ}_{se} {{ + nsimd_pack_{typ}_{se} {vs}; + }} nsimd_packx{deg}_{typ}_{se}; + + NSIMD_INLINE nsimd_packx{deg}_{typ}_{se} + nsimd_make_packx{deg}_{typ}_{se} + (nsimd_{se}_v{typ}x{deg} a0) {{ + return (nsimd_packx{deg}_{typ}_{se}){{ {avs} }}; + }} '''. \ + format(typ=typ, se=se, vs=vs, deg=deg, avs=avs) \ + for typ in common.types]) + + ret += '\n\n' + ret += '#define nsimd_make_pack(var, func) ' \ + '_Generic(var, \\\n' + ret += '\n'.join([ + 'nsimd_pack_{typ}_{se}: nsimd_make_pack_{typ}_{se}, \\'. \ + format(typ=typ, se=se) for typ in common.types \ + for se in common.simds_deps[simd_ext]]) + ret += '\n' + ret += '\n'.join([ + 'nsimd_packl_{typ}_{se}: nsimd_make_packl_{typ}_{se}, \\'. \ + format(typ=typ, se=se) for typ in common.types \ + for se in common.simds_deps[simd_ext]]) + ret += '\n' + ret += '\n'.join([ + 'nsimd_packx{d}_{typ}_{se}: nsimd_make_packx{d}_{typ}_{se}, \\'. \ + format(typ=typ, se=se, d=d) for typ in common.types \ + for d in [2, 3, 4] \ + for se in common.simds_deps[simd_ext]]) + ret += '\ndefault: nsimd_c11_type_unsupported)(func)' + + ret += '\n\n' + ret += '\n'.join([ + 'typedef nsimd_pack_{typ}_{simd_ext} nsimd_pack_{typ};'. \ + format(typ=typ, simd_ext=simd_ext) for typ in common.types]) + ret += '\n\n' + ret += '\n'.join([ + 'typedef nsimd_packl_{typ}_{simd_ext} nsimd_packl_{typ};'. \ + format(typ=typ, simd_ext=simd_ext) for typ in common.types]) + ret += '\n\n' + ret += '\n'.join([ + 'typedef nsimd_packx{d}_{typ}_{simd_ext} nsimd_packx{d}_{typ};'. \ + format(typ=typ, simd_ext=simd_ext, d=d) \ + for typ in common.types for d in [2, 3, 4]]) + + ret += '\n\n' + ret += '#define nsimd_c11_pack(var) _Generic((var), \\\n' + ret += '\n'.join([ + 'nsimd_packl_{typ}_{se}: ' \ + '((nsimd_pack_{typ}_{se} (*)())NULL)(), \\'. \ + format(typ=typ, se=se) for typ in common.types \ + for se in common.simds_deps[simd_ext]]) + ret += '\ndefault: NULL)' + + ret += '\n\n' + ret += '#define nsimd_c11_packl(var) _Generic((var), \\\n' + ret += '\n'.join([ + 'nsimd_pack_{typ}_{se}: ' \ + '((nsimd_packl_{typ}_{se} (*)())NULL)(), \\'. \ + format(typ=typ, se=se) for typ in common.types \ + for se in common.simds_deps[simd_ext]]) + ret += '\ndefault: NULL)' + + ret += '\n\n' + ret += '#define nsimd_c11_packx2(var) _Generic((var), \\\n' + ret += '\n'.join([ + 'nsimd_pack_{typ}_{se}: ' \ + '((nsimd_packx2_{typ}_{se} (*)())NULL)(), \\'. \ + format(typ=typ, se=se) for typ in common.types \ + for se in common.simds_deps[simd_ext]]) + ret += '\ndefault: NULL)' + + return ret + +# ----------------------------------------------------------------------------- +# Construct C11 overloads + +def get_c11_overloads(op, simd_ext): + if common.get_first_discriminating_type(op.params) == -1: + # Only the len operator should go here + assert op.name == 'len' + ret = '\n\n'.join([ + '''#define NSIMD_C11_LEN_nsimd_pack_{typ}_{se}() \\ + nsimd_len_{se}_{typ}() + + #define NSIMD_C11_LEN_nsimd_packl_{typ}_{se}() \\ + nsimd_len_{se}_{typ}() + + #define NSIMD_C11_LEN_nsimd_packx2_{typ}_{se}() \\ + (2 * nsimd_len_{se}_{typ}()) + + #define NSIMD_C11_LEN_nsimd_packx3_{typ}_{se}() \\ + (3 * nsimd_len_{se}_{typ}()) + + #define NSIMD_C11_LEN_nsimd_packx4_{typ}_{se}() \\ + (4 * nsimd_len_{se}_{typ}())'''.format(typ=typ, se=se) \ + for typ in op.types for se in common.simds_deps[simd_ext]]) + + ret += '\n\n' + ret += '\n\n'.join([ + '''#define NSIMD_C11_LEN_nsimd_pack_{typ}() \\ + nsimd_len_{simd_ext}_{typ}() + + #define NSIMD_C11_LEN_nsimd_packl_{typ}() \\ + nsimd_len_{simd_ext}_{typ}() + + #define NSIMD_C11_LEN_nsimd_packx2_{typ}() \\ + (2 * nsimd_len_{simd_ext}_{typ}()) + + #define NSIMD_C11_LEN_nsimd_packx3_{typ}() \\ + (3 * nsimd_len_{simd_ext}_{typ}()) + + #define NSIMD_C11_LEN_nsimd_packx4_{typ}() \\ + (4 * nsimd_len_{simd_ext}_{typ}())'''. \ + format(typ=typ, simd_ext=simd_ext) for typ in common.types]) + ret += '\n\n' + ret += '#define nsimd_len(type) \\\n' \ + 'NSIMD_PP_CAT_2(NSIMD_C11_LEN_, type)()\n\n' + return ret + + def get_c11_arg(param, name): + if param in ['*', 'c*', 's', 'p']: + return name + elif param in ['v', 'l', 'vi']: + return '({}).v'.format(name) + + args = op.params[1:] + i0 = common.get_first_discriminating_type(args) + if i0 == -1: + if op.params[0] == 'v': + pack = 'pack' + elif op.params[0] == 'l': + pack = 'packl' + elif op.params[0] == 'vx2': + pack = 'packx2' + elif op.params[0] == 'vx3': + pack = 'packx3' + elif op.params[0] == 'vx4': + pack = 'packx4' + macro_args = ', '.join(['a{}'.format(i) for i in range(len(args))]) + ret = '\n\n'.join([ + '''#define NSIMD_C11_{OP_NAME}_nsimd_{pack}_{typ}_{se}({macro_args}) \\ + nsimd_make_{pack}_{typ}_{se}( \\ + nsimd_{op_name}_{se}_{typ}({macro_args}))'''. \ + format(OP_NAME=op.name.upper(), se=se, + macro_args=macro_args, + op_name=op.name, typ=typ, pack=pack) \ + for typ in op.types \ + for se in common.simds_deps[simd_ext]]) + ret += '\n\n' + ret += '\n\n'.join([ + '''#define NSIMD_C11_{OP_NAME}_nsimd_{pack}_{typ}({macro_args}) \\ + nsimd_make_{pack}_{typ}_{simd_ext}( \\ + nsimd_{op_name}_{simd_ext}_{typ}({macro_args}))'''. \ + format(OP_NAME=op.name.upper(), simd_ext=simd_ext, + macro_args=macro_args, op_name=op.name, typ=typ, + pack=pack) for typ in op.types]) + ret += '\n\n' + type_args = ', '.join(['type'] + \ + ['a{}'.format(i) for i in range(len(args))]) + call_args = ', '.join([get_c11_arg(args[i], 'a{}'.format(i)) \ + for i in range(len(args))]) + ret += '\n\n#define nsimd_{op_name}({type_args})' \ + ' NSIMD_PP_CAT_2(NSIMD_C11_{OP_NAME}_, type)({call_args})'. \ + format(op_name=op.name, OP_NAME=op.name.upper(), + call_args=call_args, type_args=type_args) + return ret + + # Getting here means that i0 >= 0 i.e. that overloads can be determined + # by argument i0 of the operator which is in ['v', 'l', 'vx2', 'vx3', + # 'vx4'] + + macro_args = ['a{}'.format(i) for i in range(len(args))] + call_args = ', '.join([get_c11_arg(args[i], 'a{}'.format(i)) \ + for i in range(len(args))]) + if not op.closed: + macro_args = ['to_type'] + macro_args + macro_args = ', '.join(macro_args) + + if op.params[0] in ['v', 'l', 'vx2', 'vx3', 'vx4']: + if not op.closed: + ret = '#define nsimd_{}({}) ' \ + 'nsimd_make_pack((((to_type (*)())NULL)()), ' \ + '_Generic(({}), \\\n'. \ + format(op.name, macro_args, 'a{}'.format(i0)) + else: + if op.params[0] != args[i0]: + if op.params[0] == 'v': + ctrl_expr = 'nsimd_c11_pack(a{})'.format(i0) + elif op.params[0] == 'l': + ctrl_expr = 'nsimd_c11_packl(a{})'.format(i0) + elif op.params[0] == 'vx2': + ctrl_expr = 'nsimd_c11_packx2(a{})'.format(i0) + else: + ctrl_expr = 'a{}'.format(i0) + ret = '#define nsimd_{}({}) ' \ + 'nsimd_make_pack({}, _Generic(({}), \\\n'. \ + format(op.name, macro_args, ctrl_expr, 'a{}'.format(i0)) + else: + ret = '#define nsimd_{}({}) _Generic(({}), \\\n'. \ + format(op.name, macro_args, 'a{}'.format(i0)) + + suf = { 'v': '', 'l': 'l', 'vx2': 'x2', 'vx3': 'x3', 'vx4': 'x4'} + + arg = args[i0] + typ_fmt = 'nsimd_pack{}_{{}}_{{}}'.format(suf[arg]) + + for se in common.simds_deps[simd_ext]: + for typ in op.types: + ret += typ_fmt.format(typ, se) + ': ' + if op.closed: + ret += 'nsimd_{}_{}_{}, \\\n'.format(op.name, se, typ) + continue + ret += '_Generic(((to_type (*)())NULL)(), \\\n' + for to_typ in common.get_output_types(typ, op.output_to): + to_pack = 'nsimd_pack{}_{}_{}'. \ + format(suf[op.params[0]], to_typ, se) + ret += ' {}: nsimd_{}_{}_{}_{}, \\\n'. \ + format(to_pack, op.name, se, to_typ, typ) + ret += ' default: nsimd_c11_type_unsupported), \\\n' + + ret += 'default: nsimd_c11_type_unsupported)({})'.format(call_args) + if op.params[0] in ['v', 'l', 'vx2', 'vx3', 'vx4']: + ret += ')' + return ret + +# ----------------------------------------------------------------------------- + +def doit(opts): + common.myprint(opts, 'Generating advanced C API (requires C11)') + filename = os.path.join(opts.include_dir, 'c_adv_api_functions.h') + if not common.can_create_filename(opts, filename): + return + with common.open_utf8(opts, filename) as out: + out.write('''#ifndef NSIMD_C_ADV_API_FUNCTIONS_H + #define NSIMD_C_ADV_API_FUNCTIONS_H + + #include + + ''') + + for simd_ext in common.simds: + out.write('''{hbar} + {hbar} + {hbar} + + /* {SIMD_EXT} */ + + {hbar} + {hbar} + {hbar} + + #ifdef NSIMD_{SIMD_EXT} + + {types} + + '''.format(hbar=common.hbar, + types=get_c11_types(simd_ext), + SIMD_EXT=simd_ext.upper())) + + for op_name, operator in operators.operators.items(): + out.write('/* {} */\n\n{}\n\n'. \ + format(op_name, get_c11_overloads(operator, + simd_ext))) + + out.write('\n\n#endif') + + out.write('\n\n{}\n\n#endif\n'.format(common.hbar)) + diff --git a/egg/gen_advanced_api.py b/egg/gen_adv_cxx_api.py similarity index 98% rename from egg/gen_advanced_api.py rename to egg/gen_adv_cxx_api.py index 4ed77a02..0f0f2754 100644 --- a/egg/gen_advanced_api.py +++ b/egg/gen_adv_cxx_api.py @@ -182,9 +182,9 @@ def doit(opts): out.write('''#ifndef NSIMD_CXX_ADV_API_FUNCTIONS_HPP #define NSIMD_CXX_ADV_API_FUNCTIONS_HPP - namespace nsimd {{ + namespace nsimd { - '''.format(year=date.today().year)) + ''') for op_name, operator in operators.operators.items(): if not operator.autogen_cxx_adv: diff --git a/egg/gen_archis.py b/egg/gen_archis.py index c34de381..4fab3a77 100644 --- a/egg/gen_archis.py +++ b/egg/gen_archis.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Agenium Scale +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,6 +20,7 @@ import operators import common +import gen_adv_c_api import os from datetime import date import sys @@ -27,6 +28,128 @@ # ----------------------------------------------------------------------------- # Generate code for output +def get_simd_implementation_src(operator, simd_ext, from_typ, fmtspec): + if simd_ext == 'cpu': + vlen = common.CPU_NBITS // int(from_typ[1:]) + vasi = [] + params = operator.params[1:] + for i in range(len(params)): + if params[i] in ['v', 'l', 'vi']: + vasi.append('a{}.v{{i}}'.format(i)) + else: + vasi.append('a{}'.format(i)) + vasi = ', '.join(vasi) + typ2 = 'f32' if from_typ == 'f16' else from_typ + if operator.params[0] == '_': + body = '\n'.join( + ['nsimd_scalar_{op_name}_{typ2}({vasi});'. \ + format(op_name=operator.name, typ2=typ2, + vasi=vasi.format(i=i)) for i in range(vlen)]) + else: + body = 'nsimd_cpu_v{} ret;\n'.format(from_typ) + body += '\n'.join( + ['ret.v{i} = nsimd_scalar_{op_name}_{typ2}({vasi});'. \ + format(i=i, op_name=operator.name, typ2=typ2, + vasi=vasi.format(i=i)) for i in range(vlen)]) + body += '\nreturn ret;\n' + return \ + '''{hbar} + + NSIMD_INLINE {return_typ} NSIMD_VECTORCALL + nsimd_{name}_{simd_ext}_{suf}({c_args}) {{ + {body} + }} + + #if NSIMD_CXX > 0 + namespace nsimd {{ + NSIMD_INLINE {return_typ} NSIMD_VECTORCALL + {name}({cxx_args}) {{ + {body} + }} + }} // namespace nsimd + #endif + + '''.format(body=body, **fmtspec) + if from_typ == 'f16': + n = len(operator.params[1:]) + f16_to_f32 = '\n'.join( + ['nsimd_{simd_ext}_vf32x2 buf{i}' \ + ' = nsimd_upcvt_{simd_ext}_f32_f16({args});'. \ + format(i=i, args=common.get_arg(i), **fmtspec) \ + for i in range(n)]) + bufsv0 = ', '.join(['buf{}.v0'.format(i) for i in range(n)]) + bufsv1 = ', '.join(['buf{}.v1'.format(i) for i in range(n)]) + if operator.params[0] != '_': + retv0 = 'nsimd_{simd_ext}_vf32 retv0 = '.format(**fmtspec) + retv1 = 'nsimd_{simd_ext}_vf32 retv1 = '.format(**fmtspec) + f32_to_f16 = \ + 'return nsimd_downcvt_{simd_ext}_f16_f32(retv0, retv1);'. \ + format(**fmtspec) + else: + retv0 = '' + retv1 = '' + f32_to_f16 = '' + retv0 += '{sleef_symbol_prefix}_{simd_ext}_f32({bufsv0});'. \ + format(bufsv0=bufsv0, **fmtspec) + retv1 += '{sleef_symbol_prefix}_{simd_ext}_f32({bufsv1});'. \ + format(bufsv1=bufsv1, **fmtspec) + return \ + '''{hbar} + + NSIMD_INLINE {return_typ} NSIMD_VECTORCALL + nsimd_{name}_{simd_ext}_{suf}({c_args}) {{ + {f16_to_f32} + {retv0} + {retv1} + {f32_to_f16}}} + + #if NSIMD_CXX > 0 + namespace nsimd {{ + NSIMD_INLINE {return_typ} NSIMD_VECTORCALL + {name}({cxx_args}) {{ + {f16_to_f32} + {retv0} + {retv1} + {f32_to_f16}}} + }} // namespace nsimd + #endif + + '''.format(f16_to_f32=f16_to_f32, retv0=retv0, retv1=retv1, + f32_to_f16=f32_to_f16, **fmtspec) + else: + return \ + '''{hbar} + + #if NSIMD_CXX > 0 + extern "C" {{ + #endif + + NSIMD_DLLSPEC {return_typ} NSIMD_VECTORCALL + {sleef_symbol_prefix}_{simd_ext}_{suf}({c_args}); + + #if NSIMD_CXX > 0 + }} // extern "C" + #endif + + NSIMD_INLINE {return_typ} NSIMD_VECTORCALL + nsimd_{name}_{simd_ext}_{suf}({c_args}) {{ + {returns}{sleef_symbol_prefix}_{simd_ext}_{suf}({vas}); + }} + + #if NSIMD_CXX > 0 + namespace nsimd {{ + NSIMD_INLINE {return_typ} NSIMD_VECTORCALL + {name}({cxx_args}) {{ + {returns}{sleef_symbol_prefix}_{simd_ext}_{suf}({vas}); + }} + }} // namespace nsimd + #endif + + '''.format(**fmtspec) + +# ----------------------------------------------------------------------------- +# Generate code for output + def get_simd_implementation(opts, operator, mod, simd_ext): typ_pairs = [] for t in operator.types: @@ -55,31 +178,8 @@ def get_simd_implementation(opts, operator, mod, simd_ext): to_typ = pair[1] fmtspec = operator.get_fmtspec(from_typ, to_typ, simd_ext) if operator.src: - ret += \ - '''{hbar} - - #if NSIMD_CXX > 0 - extern "C" {{ - #endif - - NSIMD_DLLSPEC - {return_typ} NSIMD_VECTORCALL - nsimd_{name}_{simd_ext}_{suf}({c_args}); - - #if NSIMD_CXX > 0 - }} // extern "C" - #endif - - #if NSIMD_CXX > 0 - namespace nsimd {{ - NSIMD_INLINE {return_typ} NSIMD_VECTORCALL - {name}({cxx_args}) {{ - {returns}nsimd_{name}_{simd_ext}_{suf}({vas}); - }} - }} // namespace nsimd - #endif - - '''.format(**fmtspec) + ret += get_simd_implementation_src(operator, simd_ext, from_typ, + fmtspec) else: ret += \ '''{hbar} @@ -179,7 +279,6 @@ def gen_archis_write_put(opts, platform, simd_ext, simd_dir): out.write('#endif') common.clang_format(opts, filename) - # ----------------------------------------------------------------------------- # Generate code for architectures @@ -188,28 +287,32 @@ def gen_archis_write_file(opts, op, platform, simd_ext, simd_dir): if not common.can_create_filename(opts, filename): return mod = opts.platforms[platform] + additional_include = mod.get_additional_include(op.name, platform, + simd_ext) + if op.src: + additional_include += \ + '''#include + #include + '''.format(platform=platform, simd_ext=simd_ext) with common.open_utf8(opts, filename) as out: - out.write('''#ifndef {guard} - #define {guard} + out.write( + '''#ifndef {guard} + #define {guard} - #include - {additional_include} + #include + {additional_include} - {code} + {code} - {hbar} + {hbar} - #endif - '''.format(additional_include=mod.get_additional_include( - op.name, platform, - simd_ext), - year=date.today().year, - guard=op.get_header_guard(platform, simd_ext), - platform=platform, - simd_ext=simd_ext, - func=op.name, hbar=common.hbar, - code=get_simd_implementation(opts, op, mod, - simd_ext))) + #endif + '''.format(additional_include=additional_include, + year=date.today().year, + guard=op.get_header_guard(platform, simd_ext), + platform=platform, simd_ext=simd_ext, + func=op.name, hbar=common.hbar, + code=get_simd_implementation(opts, op, mod, simd_ext))) common.clang_format(opts, filename) def gen_archis_simd(opts, platform, simd_ext, simd_dir): @@ -235,30 +338,32 @@ def gen_archis_types(opts, simd_dir, platform, simd_ext): 'nsimd_{}_v{}x{}'.format(simd_ext, typ, deg)) \ for typ in common.types]) else: - c_code += '\n'.join([''' - typedef NSIMD_STRUCT nsimd_{simd_ext}_v{typ}x2 {{ - nsimd_{simd_ext}_v{typ} v0; - nsimd_{simd_ext}_v{typ} v1; - }} nsimd_{simd_ext}_v{typ}x2; - '''.format(simd_ext=simd_ext, typ=typ) \ - for typ in common.types]) - c_code += '\n'.join([''' - typedef NSIMD_STRUCT nsimd_{simd_ext}_v{typ}x3 {{ - nsimd_{simd_ext}_v{typ} v0; - nsimd_{simd_ext}_v{typ} v1; - nsimd_{simd_ext}_v{typ} v2; - }} nsimd_{simd_ext}_v{typ}x3; - '''.format(simd_ext=simd_ext, typ=typ) \ - for typ in common.types]) - c_code += '\n'.join([''' - typedef NSIMD_STRUCT nsimd_{simd_ext}_v{typ}x4 {{ - nsimd_{simd_ext}_v{typ} v0; - nsimd_{simd_ext}_v{typ} v1; - nsimd_{simd_ext}_v{typ} v2; - nsimd_{simd_ext}_v{typ} v3; - }} nsimd_{simd_ext}_v{typ}x4; - '''.format(simd_ext=simd_ext, typ=typ) \ - for typ in common.types]) + c_code += '\n'.join([ + ''' + typedef NSIMD_STRUCT nsimd_{simd_ext}_v{typ}x2 {{ + nsimd_{simd_ext}_v{typ} v0; + nsimd_{simd_ext}_v{typ} v1; + }} nsimd_{simd_ext}_v{typ}x2; + '''.format(simd_ext=simd_ext, typ=typ) for typ in common.types]) + + c_code += '\n'.join([ + ''' + typedef NSIMD_STRUCT nsimd_{simd_ext}_v{typ}x3 {{ + nsimd_{simd_ext}_v{typ} v0; + nsimd_{simd_ext}_v{typ} v1; + nsimd_{simd_ext}_v{typ} v2; + }} nsimd_{simd_ext}_v{typ}x3; + '''.format(simd_ext=simd_ext, typ=typ) for typ in common.types]) + + c_code += '\n'.join([ + ''' + typedef NSIMD_STRUCT nsimd_{simd_ext}_v{typ}x4 {{ + nsimd_{simd_ext}_v{typ} v0; + nsimd_{simd_ext}_v{typ} v1; + nsimd_{simd_ext}_v{typ} v2; + nsimd_{simd_ext}_v{typ} v3; + }} nsimd_{simd_ext}_v{typ}x4; + '''.format(simd_ext=simd_ext, typ=typ) for typ in common.types]) c_code += '\n\n' cxx_code = \ '\n\n'.join(['''template <> @@ -290,12 +395,10 @@ def gen_archis_types(opts, simd_dir, platform, simd_ext): #endif #endif - '''.\ - format(year=date.today().year, - platform=platform.upper(), - SIMD_EXT=simd_ext.upper(), + '''. \ + format(year=date.today().year, platform=platform.upper(), + SIMD_EXT=simd_ext.upper(), simd_ext=simd_ext, c_code=c_code, cxx_code=cxx_code, - simd_ext=simd_ext, nb_registers=mod.get_nb_registers(simd_ext))) common.clang_format(opts, filename) diff --git a/egg/gen_doc.py b/egg/gen_doc.py index 4ddc8563..0671525a 100644 --- a/egg/gen_doc.py +++ b/egg/gen_doc.py @@ -1,7 +1,7 @@ # Use utf-8 encoding # -*- coding: utf-8 -*- -# Copyright (c) 2019 Agenium Scale +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -54,9 +54,9 @@ def gen_overview(opts): ## NSIMD scalar types -Their names follows the following pattern: `Sxx` where +Their names follow the following pattern: `Sxx` where -- `S` is `i` for signed integers, `u` for unsigned integer and `f` for +- `S` is `i` for signed integers, `u` for unsigned integer or `f` for floatting point number. - `xx` is the number of bits taken to represent the number. @@ -66,29 +66,40 @@ def gen_overview(opts): for t in common.types: fout.write('- `{}`\n'.format(t)) fout.write(''' +## NSIMD generic SIMD vector types -## NSIMD SIMD vector types +In NSIMD, we call a platform an architecture e.g. Intel, ARM, POWERPC. We call +SIMD extension a set of low-level functions and types provided by hardware +vendors to access SIMD units. Examples include SSE2, SSE42, AVX, ... When +compiling the generic SIMD vector types represents a SIMD register of the +target. Examples are a `__m128` for Intel SSE, `__m512` for Intel AVX-512 or +`svfloat32_t` for Arm SVE. -Their names follows the following pattern: `vSCALAR` where `SCALAR` is a -one of scalar type listed above. For example `vi8` means a SIMD vector -containing `i8`'s. +Their names follow the following pattern: + +- C base API: `vSCALAR` where `SCALAR` is a one of scalar type listed above. +- C advanced API: `nsimd_pack_SCALAR` where `SCALAR` is a one of scalar type + listed above. +- C++ advanced API: `nsimd::pack` where `SCALAR` is a one of scalar + type listed above. Full list of SIMD vector types: +| Base type | C base API | C advanced API | C++ advanced API | +|-----------|------------|----------------|------------------| ''') - for t in common.types: - fout.write('- `v{}`\n'.format(t)) + + fout.write('\n'.join([ + '| `{typ}` | `v{typ}` | `nsimd_pack_{typ}` | `nsimd::pack<{typ}>` |'. \ + format(typ=typ) for typ in common.types])) + fout.write(''' ## C/C++ base APIs These come automatically when you include `nsimd/nsimd.h`. You do *not* need -to include a header file for having a function. In NSIMD, we call a platform -an architecture e.g. Intel, ARM, POWERPC. We call SIMD extension a set of -low-level functions and types provided to access a given SIDM extension. -Examples include SSE2, SSE42, AVX, ... - -Here is a list of supported platforms and their corresponding SIMD extensions. +to include a header file for having a function. Here is a list of supported +platforms and their corresponding SIMD extensions. ''') platforms = common.get_platforms(opts) @@ -98,27 +109,26 @@ def gen_overview(opts): fout.write(' - `{}`\n'.format(s)) fout.write(''' Each simd extension has its own set of SIMD types and functions. Types follow -the following pattern: `nsimd_SIMDEXT_vSCALAR` where +the pattern: `nsimd_SIMDEXT_vSCALAR` where - `SIMDEXT` is the SIMD extensions. - `SCALAR` is one of scalar types listed above. There are also logical types associated to each SIMD vector type. These types -are used to represent the result of a comparison of SIMD vectors. They are -usually bit masks. Their name follow the following pattern: +are used, for example, to represent the result of a comparison of SIMD vectors. +They are usually bit masks. Their name follow the pattern: `nsimd_SIMDEXT_vlSCALAR` where - `SIMDEXT` is the SIMD extensions. - `SCALAR` is one of scalar types listed above. -Note 1: Platform `cpu` is scalar fallback when no SIMD extension has been -specified. +Note 1: Platform `cpu` is a 128 bits SIMD emulation fallback when no SIMD +extension has been specified or is supported on a given compilation target. Note 2: as all SIMD extensions of all platforms are different there is no need to put the name of the platform in each identifier. -Function names follow the following pattern: `nsimd_SIMDEXT_FUNCNAME_SCALAR` -where +Function names follow the pattern: `nsimd_SIMDEXT_FUNCNAME_SCALAR` where - `SIMDEXT` is the SIMD extensions. - `FUNCNAME` is the name of a function e.g. `add` or `sub`. @@ -126,20 +136,20 @@ def gen_overview(opts): ### Generic identifier -In C, genericity is achieved using macros. +In the base C API, genericity is achieved using macros. -- `vec(SCALAR)` represents the SIMD vector type containing SCALAR elements. - SCALAR must be one of scalar types listed above. -- `vecl(SCALAR)` represents the SIMD vector of logicals type containing SCALAR +- `vec(SCALAR)` is a type to represent a SIMD vector containing SCALAR + elements. SCALAR must be one of scalar types listed above. +- `vecl(SCALAR)` is a type to represent a SIMD vector of logicals for SCALAR elements. SCALAR must be one of scalar types listed above. -- `vec_e(SCALAR)` represents the SIMD vector type containing SCALAR elements. - SCALAR must be one of scalar types listed above. -- `vecl_e(SCALAR)` represents the SIMD vector of logicals type containing - SCALAR elements. SCALAR must be one of scalar types listed above. -- `vFUNCNAME` is the macro name to access the function FUNCNAME e.g. `vadd`, - `vsub`. -- `vFUNCNAME_e` is the macro name to access the function FUNCNAME e.g. - `vadd_e`, `vsub_e`. +- `vec_a(SCALAR, SIMDEXT)` is a type to represent a SIMD vector containing + SCALAR elements for the simd extension SIMDEXT. SCALAR must be one of scalar + types listed above and SIMDEXT must be a valid SIMD extension. +- `vecl_a(SCALAR, SIMDEXT)` is a type to represent a SIMD vector of logicals + for SCALAR elements for the simd extension SIMDEXT. SCALAR must be one of + scalar types listed above and SIMDEXT must be a valid SIMD extension. +- `vFUNCNAME` takes as input the above types to access the operator FUNCNAME + e.g. `vadd`, `vsub`. In C++98 and C++03, type traits are available. @@ -158,12 +168,17 @@ def gen_overview(opts): - `nsimd::vectorl` is a typedef to `nsimd::simd_traits::vectorl`. +The C++20 API does not bring different types for SIMD registers nor other +way to access the other SIMD types. It only brings concepts instead of usual +`typename`s. For more informations cf. . + Note that all macro and functions available in plain C are still available in C++. -### List of functions available for manipulation of SIMD vectors +### List of operators provided by the base APIs -For each FUNCNAME a C function (also available in C++) +In the documentation we use interchangeably the terms "function" and +"operator". For each operator FUNCNAME a C function (also available in C++) named `nsimd_SIMDEXT_FUNCNAME_SCALAR` is available for each SCALAR type unless specified otherwise. @@ -180,11 +195,13 @@ def gen_overview(opts): For example, for the addition of two SIMD vectors `a` and `b` here are the possibilities: - c = nsimd_add_avx_f32(a, b); // use AVX - c = nsimd::add(a, b, f32()); // use detected SIMDEXT - c = nsimd::add(a, b, f32(), avx()); // force AVX even if detected SIMDEXT is not AVX - c = vadd(a, b, f32); // use detected SIMDEXT - c = vadd_e(a, b, f32, avx); // force AVX even if detected SIMDEXT is not AVX +```c++ +c = nsimd_add_avx_f32(a, b); // use AVX +c = nsimd::add(a, b, f32()); // use detected SIMDEXT +c = nsimd::add(a, b, f32(), avx()); // force AVX even if detected SIMDEXT is not AVX +c = vadd(a, b, f32); // use detected SIMDEXT +c = vadd_e(a, b, f32, avx); // force AVX even if detected SIMDEXT is not AVX +``` Here is a list of available FUNCNAME. @@ -196,23 +213,48 @@ def gen_overview(opts): args = ', '.join([common.get_one_type_generic(p, 'SCALAR') + \ ' a' + str(count) for count, p in \ enumerate(operator.params[1:])]) - fout.write('- `{} {}({});`\n'.format(return_typ, func, args)) - - if operator.domain and len(operator.params[1:]) > 0: - params = operator.params[1:] - - if len(params) == 1: - fout.write(' a0 ∈ {}\n'.format(operator.domain)) - else: - param = ', '.join(['a' + str(count) for count in \ - range(len(params))]) - fout.write(' ({}) ∈ {}\n'.format(param, operator.domain)) - + fout.write('- `{} {}({});` \n'.format(return_typ, func, args)) if len(operator.types) < len(common.types): typs = ', '.join(['{}'.format(t) for t in operator.types]) fout.write(' Only available for {}\n'.format(typs)) + fout.write(''' +## C advanced API (only available in C11) + +The C advanced API takes advantage of the C11 `_Generic` keyword to provide +function overloading. Unlike the base API described above there is no need to +pass as arguments the base type of the SIMD extension. The informations are +contained in the types provided by this API. + +- `nsimd_pack_SCALAR_SIMDEXT` represents a SIMD vectors containing + SCALAR elements of SIMD extension SIMDEXT. +- `nsimd::packl_SCALAR_SIMDEXT` represents a SIMD vectors of logicals + for SCALAR elements of SIMD extension SIMDEXT. + +There are versions of the above type without SIMDEXT for which the targeted +SIMD extension is automatically chosen. + +- `nsimd_pack_SCALAR` represents a SIMD vectors containing SCALAR elements. +- `nsimd::packl_SCALAR` represents a SIMD vectors of logicals for SCALAR + elements. + +Generic types are also available: + +- `nsimd_pack(SCALAR)` is a type to represent a SIMD vector containing SCALAR + elements. SCALAR must be one of scalar types listed above. +- `nsimd_packl(SCALAR)` is a type to represent a SIMD vector of logicals for + SCALAR elements. SCALAR must be one of scalar types listed above. +- `nsimd_pack_a(SCALAR, SIMDEXT)` is a type to represent a SIMD vector + containing SCALAR elements for the simd extension SIMDEXT. SCALAR must be one + of scalar types listed above and SIMDEXT must be a valid SIMD extension. +- `nsimd_packl_a(SCALAR, SIMDEXT)` is a type to represent a SIMD vector of + logicals for SCALAR elements for the simd extension SIMDEXT. SCALAR must be + one of scalar types listed above and SIMDEXT must be a valid SIMD extension. + +Finally, operators are follow the naming: `nsimd_FUNCNAME` e.g. `nsimd_add`, +`nsimd_sub`. + ## C++ advanced API The C++ advanced API is called advanced not because it requires C++11 or above @@ -239,38 +281,9 @@ def gen_overview(opts): otherwise e.g. the load family of funtions. It is impossible to determine the kind of pack (unroll and SIMDEXT) from the type of a pointer. Therefore in this case, the last argument must be a pack and this same type will then -return. Also some functions are available as C++ operators. - -Here is the list of functions that act on packs. - +return. Also some functions are available as C++ operators. They follow the +naming: `nsimd::FUNCNAME`. ''') - for op_name, operator in operators.items(): - return_typ = common.get_one_type_pack(operator.params[0], 1, 'N') - func = operator.name - args = ', '.join([common.get_one_type_pack(p, 0, 'N') + ' a' + \ - str(count) for count, p in \ - enumerate(operator.params[1:])]) - if 'v' not in operator.params[1:] and 'l' not in operator.params[1:]: - args = args + ', pack const&' if args != '' \ - else 'pack const&' - fout.write('- `{} {}({});`\n'.format(return_typ, func, args)) - - if operator.domain and len(operator.params[1:]) > 0: - params = operator.params[1:] - if len(params) == 1: - fout.write(' a0 ∈ {}\n'.format(operator.domain)) - else: - param = ', '.join(['a'+str(count) for count in \ - range(len(params))]) - fout.write(' ({}) ∈ {}\n'.format(param, operator.domain)) - - if operator.cxx_operator: - fout.write(' Available as `{}`\n'. \ - format('operator'+operator.cxx_operator)) - - if len(operator.types) < len(common.types): - typs = ', '.join(['{}'.format(t) for t in operator.types]) - fout.write(' Only available for {}\n'.format(typs)) # ----------------------------------------------------------------------------- @@ -337,6 +350,10 @@ def to_string(var): fout.write('```c\n') fout.write(to_string(operator.get_generic_signature('c_base'))) fout.write('\n```\n\n') + fout.write('\n\n## C advanced API (generic, requires C11)\n\n') + fout.write('```c\n') + fout.write(to_string(operator.get_generic_signature('c_adv'))) + fout.write('\n```\n\n') fout.write('## C++ base API (generic)\n\n') fout.write('```c++\n') fout.write(to_string(operator.get_generic_signature('cxx_base'))) @@ -431,8 +448,8 @@ def gen_what_is_wrapped(opts): common.in1, common.in2, common.in3, common.in4, common.in5) - # For now we only list Intel and Arm intrinsics - simd_exts = common.x86_simds + common.arm_simds + # For now we only list Intel, Arm and POWERPC intrinsics + simd_exts = common.x86_simds + common.arm_simds + common.ppc_simds for p in common.get_platforms(opts): index_simds = '' for simd_ext in opts.platforms_list[p].get_simd_exts(): @@ -444,6 +461,8 @@ def gen_what_is_wrapped(opts): format(simd_ext.upper(), simd_ext) ops = [[], [], [], []] for op_name, operator in operators.items(): + if operator.src: + continue c_src = os.path.join(opts.include_dir, p, simd_ext, '{}.h'.format(op_name)) ops[operator.output_to].append('{} "{}"'. \ @@ -554,8 +573,7 @@ def get_html_file(opts, name, module=''): - + diff --git a/egg/gen_scalar_utilities.py b/egg/gen_scalar_utilities.py index a54c843e..a61ee517 100644 --- a/egg/gen_scalar_utilities.py +++ b/egg/gen_scalar_utilities.py @@ -24,10 +24,11 @@ import scalar import cuda import rocm +import oneapi # ----------------------------------------------------------------------------- -def get_gpu_impl(gpu_sig, cuda_impl, rocm_impl): +def get_gpu_impl(gpu_sig, cuda_impl, rocm_impl, oneapi_sig, oneapi_impl): if cuda_impl == rocm_impl: return '''#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) @@ -35,7 +36,15 @@ def get_gpu_impl(gpu_sig, cuda_impl, rocm_impl): {cuda_impl} }} - #endif'''.format(gpu_sig=gpu_sig, cuda_impl=cuda_impl) + #elif defined(NSIMD_ONEAPI) + + inline {oneapi_sig} {{ + {oneapi_impl} + }} + + #endif'''.format(gpu_sig=gpu_sig, cuda_impl=cuda_impl, + oneapi_sig=oneapi_sig, + oneapi_impl=oneapi_impl) else: return '''#if defined(NSIMD_CUDA) @@ -43,16 +52,21 @@ def get_gpu_impl(gpu_sig, cuda_impl, rocm_impl): {cuda_impl} }} - #endif - - #ifdef NSIMD_ROCM + #elif defined(NSIMD_ROCM) inline {gpu_sig} {{ {rocm_impl} }} + #elif defined(NSIMD_ONEAPI) + + inline {oneapi_sig} {{ + {oneapi_impl} + }} + #endif'''.format(gpu_sig=gpu_sig, cuda_impl=cuda_impl, - rocm_impl=rocm_impl) + rocm_impl=rocm_impl, oneapi_sig=oneapi_sig, + oneapi_impl=oneapi_impl) # ----------------------------------------------------------------------------- @@ -65,6 +79,7 @@ def doit(opts): # we declare reinterprets now as we need them scalar_tmp = [] gpu_tmp = [] + oneapi_tmp = [] for t in operators.Reinterpret.types: for tt in common.get_output_types( t, operators.Reinterpret.output_to): @@ -72,10 +87,23 @@ def doit(opts): get_scalar_signature('cpu', t, tt, 'c')] gpu_tmp += [operators.Reinterpret(). \ get_scalar_signature('gpu', t, tt, 'cxx')] + oneapi_tmp += [operators.Reinterpret(). \ + get_scalar_signature('oneapi', t, tt, 'cxx')] scalar_reinterpret_decls = '\n'.join(['NSIMD_INLINE ' + sig + ';' \ for sig in scalar_tmp]) gpu_reinterpret_decls = '\n'.join(['inline ' + sig + ';' \ for sig in gpu_tmp]) + oneapi_reinterpret_decls = '\n'.join(['inline ' + sig + ';' \ + for sig in oneapi_tmp]) + sleef_decls = '' + for op in operators.operators.values(): + if 'sleef_symbol_prefix' in op.__class__.__dict__: + sleef_decls += 'f32 {}_scalar_f32({});\n'. \ + format(op.sleef_symbol_prefix, + ', '.join(['f32'] * len(op.params[1:]))) + sleef_decls += 'f64 {}_scalar_f64({});\n'. \ + format(op.sleef_symbol_prefix, + ', '.join(['f64'] * len(op.params[1:]))) out.write( '''#ifndef NSIMD_SCALAR_UTILITIES_H #define NSIMD_SCALAR_UTILITIES_H @@ -98,26 +126,51 @@ def doit(opts): #endif #endif + {hbar} + + #if NSIMD_CXX > 0 + extern "C" {{ + #endif + + {sleef_decls} + + #if NSIMD_CXX > 0 + }} // extern "C" + #endif + + {hbar} + {scalar_reinterpret_decls} - #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) + #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || \ + defined(NSIMD_ONEAPI) namespace nsimd {{ + #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) + {gpu_reinterpret_decls} + #elif defined(NSIMD_ONEAPI) + + {oneapi_reinterpret_decls} + + #endif + }} // namespace nsimd #endif '''. \ - format(scalar_reinterpret_decls=scalar_reinterpret_decls, - gpu_reinterpret_decls=gpu_reinterpret_decls)) + format(hbar=common.hbar, sleef_decls=sleef_decls, + scalar_reinterpret_decls=scalar_reinterpret_decls, + gpu_reinterpret_decls=gpu_reinterpret_decls, + oneapi_reinterpret_decls=oneapi_reinterpret_decls)) for op_name, operator in operators.operators.items(): if not operator.has_scalar_impl: continue if operator.params == ['l'] * len(operator.params): out.write('\n\n' + common.hbar + '\n\n') - out.write(\ + out.write( '''NSIMD_INLINE {c_sig} {{ {scalar_impl} }} @@ -144,13 +197,15 @@ def doit(opts): gpu_impl=get_gpu_impl( operator.get_scalar_signature('gpu', t, tt, 'cxx'), cuda.get_impl(operator, tt, t), - rocm_impl=rocm.get_impl(operator, tt, t)))) + rocm.get_impl(operator, tt, t), + operator.get_scalar_signature('oneapi', t, tt, 'cxx'), + oneapi.get_impl(operator, tt, t)))) continue for t in operator.types: tts = common.get_output_types(t, operator.output_to) for tt in tts: out.write('\n\n' + common.hbar + '\n\n') - out.write(\ + out.write( '''NSIMD_INLINE {c_sig} {{ {scalar_impl} }} @@ -178,7 +233,9 @@ def doit(opts): gpu_impl=get_gpu_impl( operator.get_scalar_signature('gpu', t, tt, 'cxx'), cuda.get_impl(operator, tt, t), - rocm_impl=rocm.get_impl(operator, tt, t)))) + rocm.get_impl(operator, tt, t), + operator.get_scalar_signature('oneapi', t, tt, 'cxx'), + oneapi.get_impl(operator, tt, t)))) out.write(''' diff --git a/egg/gen_src.py b/egg/gen_src.py index bc5e73b0..d6cfd35e 100644 --- a/egg/gen_src.py +++ b/egg/gen_src.py @@ -100,170 +100,6 @@ def get_put_impl(simd_ext): ret += '} // extern "C"\n' return ret -# ----------------------------------------------------------------------------- -# Implementations for all other functions - -def get_impl(operator, emulate_fp16, simd_ext): - ret = '' - for t in operator.types: - if not operator.closed: - # For now we do not support generation of non closed operators - # for the binary - raise Exception('Non closed operators not supported') - fmtspec = operator.get_fmtspec(t, t, simd_ext) - args_list = common.enum(operator.params[1:]) - args = [] - args1 = [] - args2 = [] - for a in args_list: - if a[1] == 'v': - if emulate_fp16 and t == 'f16': - # cpu is the only exception - if simd_ext == 'cpu': - n = common.CPU_NBITS // 16 // 2 - args1 += ['nsimd::pack(nsimd_cpu_vf32{' + \ - ','.join('a{}.v{}'.format(a[0], i) \ - for i in range(0, n)) + '})'] - args2 += ['nsimd::pack(nsimd_cpu_vf32{' + \ - ','.join('a{}.v{}'.format(a[0], i + n) \ - for i in range(0, n)) + '})'] - else: - args += ['nsimd::pack(a{}.v{{lohi}})'. \ - format(a[0])] - else: - args += ['nsimd::pack<{}>(a{})'.format(t, a[0])] - elif a[1] == 'l': - if emulate_fp16 and t == 'f16': - if simd_ext == 'cpu': - n = common.CPU_NBITS // 16 // 2 - args1 += ['nsimd::packl(nsimd_cpu_vlf32{' + \ - ','.join('a{}.v{}'.format(a[0], i) \ - for i in range(0, n)) + '})'] - args2 += ['nsimd::packl(nsimd_cpu_vlf32{' + \ - ','.join('a{}.v{}'.format(a[0], i + n) \ - for i in range(0, n)) + '})'] - else: - args += ['nsimd::packl(a{}.v{{lohi}})'. \ - format(a[0])] - else: - args += ['nsimd::packl<{}>(a{})'.format(t, a[0])] - else: - args += ['a{}'.format(a[0])] - args = ', '.join(args) - args1 = ', '.join(args1) - args2 = ', '.join(args2) - if emulate_fp16 and t == 'f16': - if simd_ext == 'cpu': - n = common.CPU_NBITS // 16 - lo = '\n'.join(['ret.v{} = tmp.car.v{};'.format(i, i) \ - for i in range(0, n // 2)]) - hi = '\n'.join(['ret.v{} = tmp.car.v{};'. \ - format(i + n // 2, i) \ - for i in range(0, n // 2)]) - ret += \ - '''{hbar} - - extern "C" {{ - - NSIMD_DLLEXPORT {return_typ} NSIMD_VECTORCALL - nsimd_{name}_cpu_{suf}({c_args}) {{ - nsimd_cpu_v{logical}f16 ret; - nsimd::pack{logical} tmp; - tmp = nsimd::impl::{name}({args1}); - {lo} - tmp = nsimd::impl::{name}({args2}); - {hi} - return ret; - }} - - }} // extern "C" - - '''.format(args1=args1, args2=args2, lo=lo, hi=hi, - logical='l' if operator.params[0] == 'l' else '', - member='.f' if operator.params[0] == 'v' \ - else '.u', **fmtspec) - else: - ret += \ - '''{hbar} - - extern "C" {{ - - NSIMD_DLLEXPORT {return_typ} NSIMD_VECTORCALL - nsimd_{name}_{simd_ext}_{suf}({c_args}) {{ - nsimd_{simd_ext}_v{logical}f16 ret; - auto buf = nsimd::impl::{name}({args1}); - ret.v0 = buf.car; - buf = nsimd::impl::{name}({args2}); - ret.v1 = buf.car; - return ret; - }} - - }} // extern "C" - - '''.format(args1=args.format(lohi='0'), - args2=args.format(lohi='1'), - logical='l' if operator.params[0] == 'l' else '', - **fmtspec) - else: - if t == 'f16': - inputs = \ - '\n'.join(['''f16 buf{i}_f16[NSIMD_MAX_LEN(f16)]; - f32 buf{i}_f32[NSIMD_MAX_LEN(f16)]; - storeu(buf{i}_f16, a{i}, f16(), {simd_ext}()); - for (int i = 0; i < len_f16; i++) {{ - buf{i}_f32[i] = (f32)buf{i}_f16[i]; - }} - '''.format(i=i, **fmtspec) for i in \ - range(0, len(args_list))]) - f32_args_lo = \ - ', '.join(['loadu>(buf{}_f32)'. \ - format(i) for i in range(0, len(args_list))]) - f32_args_hi = \ - ', '.join(['loadu>(buf{}_f32 + len_f32)'. \ - format(i) for i in range(0, len(args_list))]) - ret += \ - '''{hbar} - - extern "C" {{ - - NSIMD_DLLEXPORT {return_typ} NSIMD_VECTORCALL - nsimd_{name}_{simd_ext}_{suf}({c_args}) {{ - using namespace nsimd; - int len_f16 = len(pack()); - int len_f32 = len(pack()); - {inputs} - auto temp = nsimd::impl::{name}({f32_args_lo}); - storeu(buf0_f32, temp.car, f32(), {simd_ext}()); - temp = nsimd::impl::{name}({f32_args_hi}); - storeu(buf0_f32 + len_f32, temp.car, f32(), {simd_ext}()); - for (int i = 0; i < len_f16; i++) {{ - buf0_f16[i] = (f16)buf0_f32[i]; - }} - return loadu(buf0_f16, f16()); - }} - - }} // extern "C" - - '''.format(inputs=inputs, f32_args_lo=f32_args_lo, - f32_args_hi=f32_args_hi, **fmtspec) - else: - ret += \ - '''{hbar} - - extern "C" {{ - - NSIMD_DLLEXPORT {return_typ} NSIMD_VECTORCALL - nsimd_{name}_{simd_ext}_{suf}({c_args}) {{ - auto buf = nsimd::impl::{name}({args}); - return buf.car; - }} - - }} // extern "C" - - '''.format(args=args, **fmtspec) - return ret - - # ----------------------------------------------------------------------------- # Generate base APIs @@ -277,16 +113,7 @@ def write_cpp(opts, simd_ext, emulate_fp16): #include '''.format(year=date.today().year)) - for op_name, operator in operators.operators.items(): - if operator.src: - out.write('''{hbar} - - #include - - '''.format(name=operator.name, hbar=common.hbar)) - out.write(get_impl(operator, emulate_fp16, simd_ext)) out.write(get_put_impl(simd_ext)) - common.clang_format(opts, filename) def doit(opts): @@ -296,5 +123,4 @@ def doit(opts): for platform in opts.platforms: mod = opts.platforms[platform] for simd_ext in mod.get_simd_exts(): - write_cpp(opts, simd_ext, - mod.emulate_fp16(simd_ext)) + write_cpp(opts, simd_ext, mod.emulate_fp16(simd_ext)) diff --git a/egg/gen_tests.py b/egg/gen_tests.py index 4e205b21..aecf237a 100644 --- a/egg/gen_tests.py +++ b/egg/gen_tests.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 Agenium Scale +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,11 +20,185 @@ # SOFTWARE. import os +import math import sys import common import operators from datetime import date +# ----------------------------------------------------------------------------- +# Helper functions + +def should_i_do_the_test(operator, tt='', t=''): + if operator.name == 'cvt' and t in common.ftypes and tt in common.iutypes: + # When converting from float to int to float then we may not + # get the initial result because of roundings. As tests are usually + # done by going back and forth then both directions get tested in the + # end + return False + if operator.name == 'reinterpret' and t in common.iutypes and \ + tt in common.ftypes: + # When reinterpreting from int to float we may get NaN or infinities + # and no ones knows what this will give when going back to ints + # especially when float16 are emulated. Again as tests are done by + # going back and forth both directions get tested in the end. + return False + if operator.name in ['notb', 'andb', 'andnotb', 'xorb', 'orb'] and \ + t == 'f16': + # Bit operations on float16 are hard to check because they are + # emulated in most cases. Therefore going back and forth with + # reinterprets for doing bitwise operations make the bit in the last + # place to wrong. This is normal but makes testing real hard. So for + # now we do not test them on float16. + return False + if operator.name in ['len', 'set1', 'set1l', 'mask_for_loop_tail', + 'loadu', 'loada', 'storeu', 'storea', 'loadla', + 'loadlu', 'storela', 'storelu', 'if_else1']: + # These functions are used in almost every tests so we consider + # that they are extensively tested. + return False + if operator.name in ['store2a', 'store2u', 'store3a', 'store3u', + 'store4a', 'store4u', 'scatter', 'scatter_linear', + 'downcvt', 'to_logical']: + # These functions are tested along with their load counterparts. + # downcvt is tested along with upcvt and to_logical is tested with + # to_mask + return False + return True + +# ----------------------------------------------------------------------------- +# CBPRNG + +def cbprng_impl(typ, domain_, for_cpu, only_int = False): + code = '((((unsigned int)(1 + i) * 69342380u + 414585u) ' \ + '^ ((unsigned int)(1 + j) * 89375027u + 952905u))' \ + '% 1000000u)' + def c_code(a0_, a1_): + if a1_ < a0_: + raise ValueError("a0 must be lesser than a1") + if typ in common.utypes and a0_ < 0.0 and a1_ < 0.0: + raise ValueError("a0 and a1 must be positive") + if typ in common.ftypes: + a0 = a0_ + a1 = a1_ + else: + a0 = 0 if typ in common.utypes and a0_ < 0 else math.ceil(a0_) + a1 = math.floor(a1_) + if a1 < a0: + raise ValueError("a0 and a1 must be positive after filtering") + + if typ in common.iutypes: + return 'return ({})({} + (f32)((i32){} % {}));'. \ + format(typ, a0, code, a1 - a0 + 1) + elif typ == 'f16': + return \ + 'return {}({}(((f32){} + (f32){} * (f32)({}) / 1000000.0f)));'. \ + format('(f16)' if not for_cpu else 'nsimd_f32_to_f16', + '(f32)(i32)' if only_int else '', a0, a1 - a0, code) + elif typ in ['f32', 'f64']: + return \ + 'return {}(({}){} + ({}){} * ({}){} / ({})1000000);'. \ + format('({})({})'.format(typ, 'i' + typ[1:]) if only_int else '', + typ, a0, typ, a1 - a0, typ, code, typ) + + if typ not in common.utypes: + domain = domain_ + domain = [] + for i in range(len(domain_) // 2): + if domain_[2 * i + 1] > 0: + domain.append(domain_[2 * i]) + domain.append(domain_[2 * i + 1]) + if len(domain) == 0: + raise ValueError('domain {} is empty after filtering'.format(domain_)) + + nb_intervals = len(domain) // 2 + if nb_intervals == 1: + return ' {}'.format(c_code(domain[0], domain[1])) + ret = 'int piece = ((1 + i) * (1 + j)) % {};'.format(nb_intervals) + for i in range(nb_intervals - 1): + ret += '\nif (piece == {}) {{\n'.format(i) + ret += ' {}\n'.format(c_code(domain[2 * i], domain[2 * i + 1])) + ret += '} else ' + ret += '{\n' + ret += ' {}\n'.format(c_code(domain[-2], domain[-1])) + ret += '}' + return ret + +def cbprng(typ, operator, target, gpu_params = None): + if target not in ['cpu', 'cuda', 'hip', 'oneapi']: + raise ValueError('Unsupported target, must be cpu, cuda, hip or ' + 'oneapi') + + arity = len(operator.params[1:]) + ret = '{}{} random_impl(int i, int j) {{\n'. \ + format('' if target in ['cpu', 'oneapi'] else '__device__ ', typ) + for_cpu = (target == 'cpu') + + if arity == 1: + ret += cbprng_impl(typ, operator.domain[0], for_cpu, + operator.tests_on_integers_only) + else: + for i in range(arity - 1): + ret += 'if (j == {}) {{\n {}\n}} else '. \ + format(i, cbprng_impl(typ, operator.domain[i], for_cpu, + operator.tests_on_integers_only)) + ret += '{{\n{}\n}} '. \ + format(cbprng_impl(typ, operator.domain[-1], + for_cpu, operator.tests_on_integers_only)) + ret += '\n}\n\n' + + if target == 'cpu': + ret += '''void random({} *dst, unsigned int n, int j) {{ + unsigned int i; + for (i = 0; i < n; i++) {{ + dst[i] = random_impl((int)i, j); + }} + }}'''.format(typ) + elif target == 'cuda': + ret += '''__global__ void random_kernel({typ} *dst, int n, int j) {{ + int i = threadIdx.x + blockIdx.x * blockDim.x; + if (i < n) {{ + dst[i] = random_impl((int)i, j); + }} + }} + + void random({typ} *dst, unsigned int n, int j) {{ + random_kernel<<<{gpu_params}>>>(dst, (int)n, j); + }}'''.format(typ=typ, gpu_params=gpu_params) + elif target == 'hip': + ret += '''__global__ void random_kernel({typ} *dst, size_t n, int j) {{ + size_t i = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + if (i < n) {{ + dst[i] = random_impl((int)i, j); + }} + }} + + void random({typ} *dst, unsigned int n, int j) {{ + hipLaunchKernelGGL(random_kernel, {gpu_params}, 0, 0, + dst, n, j); + }}'''.format(typ=typ, gpu_params=gpu_params) + elif target == 'oneapi': + ret += '''inline void random_kernel({typ} *dst, unsigned int n, int j, + sycl::nd_item<1> item) {{ + size_t i = item.get_global_id().get(0); + if (i < n) {{ + dst[i] = random_impl((int)i, j); + }} + }} + + void random({typ} *dst, unsigned int n, int j) {{ + size_t nt = (size_t)nsimd_kernel_param({n}, {tpb}); + sycl::queue q_ = nsimd::oneapi::default_queue(); + q_.parallel_for(sycl::nd_range<1>(sycl::range<1>(nt), + sycl::range<1>({tpb})), + [=](sycl::nd_item<1> item){{ + random_kernel(dst, n, j, item); + }}).wait_and_throw(); + }}'''.format(typ=typ, n=gpu_params[0], tpb=gpu_params[1]) + return ret + +# ----------------------------------------------------------------------------- + posix_c_source = \ '''#if !defined(_POSIX_C_SOURCE) #define _POSIX_C_SOURCE 200112L @@ -41,22 +215,20 @@ #pragma warning( disable : 4334 ) #endif''' - # ----------------------------------------------------------------------------- # Get filename for test def get_filename(opts, op, typ, lang, custom_name=''): - pp_lang = {'c_base': 'C (base API)', - 'cxx_base' : 'C++ (base API)', - 'cxx_adv' : 'C++ (advanced API)'} tests_dir = os.path.join(opts.tests_dir, lang) common.mkdir_p(tests_dir) + ext = { 'c_base': 'prec11.c', 'c_adv': 'c' } if not custom_name: filename = os.path.join(tests_dir, '{}.{}.{}'.format(op.name, typ, - 'c' if lang == 'c_base' else 'cpp')) + ext[lang] if lang in ['c_base', 'c_adv'] else 'cpp')) else: filename = os.path.join(tests_dir, '{}_{}.{}.{}'.format(op.name, - custom_name, typ, 'c' if lang == 'c_base' else 'cpp')) + custom_name, typ, + ext[lang] if lang in ['c_base', 'c_adv'] else 'cpp')) if common.can_create_filename(opts, filename): return filename else: @@ -69,15 +241,21 @@ def get_includes(lang): ret = '#include \n' if lang == 'cxx_adv': ret += '#include \n' - if lang == 'c_base': + if lang == 'c_adv': + ret += '#include \n' + if lang in ['c_base', 'c_adv']: ret += '''#include #include #include + #include + #include #include ''' else: ret += '''#include #include #include + #include + #include #include ''' return ret @@ -87,16 +265,8 @@ def get_includes(lang): distance_int = ''' int distance({typ} a, {typ} b) {{ - {typ} d; - if (a > b) {{ - d = ({typ})(a - b); - }} else {{ - d = ({typ})(b - a); - }} - if ((u64)d > (u64)INT_MAX) {{ - return INT_MAX; - }} - return (int)d; + {typ} d = (a > b ? a - b : b - a); + return (int)((u64)d > (u64)INT_MAX) ? (u64)INT_MAX : (u64)d); }} ''' @@ -118,7 +288,7 @@ def get_includes(lang): return -1; }} - return nsimd_diff_in_logulps_{typ}(a, b); + return nsimd_ufp_{typ}(a, b); }} /* ------------------------------------------------------------------------- */ @@ -138,7 +308,6 @@ def get_includes(lang): 'f64': distance_float.format(typ='f64') } - # ----------------------------------------------------------------------------- # Template for a lot of tests @@ -163,7 +332,7 @@ def get_includes(lang): {extra_code} -int comp_function({typ} mpfr_out, {typ} nsimd_out) +int comp_function({typ} ref_out, {typ} nsimd_out) {{ {comp}; }} @@ -182,11 +351,9 @@ def get_includes(lang): fflush(stdout); /* Fill input vector(s) with random values */ - for (i = 0; i < SIZE; i++) {{ - {vin_rand} - }} + {vin_rand} - /* we ensure that ipnuts are normal numbers */ + /* We ensure that inputs are normal numbers */ for (i = 0; i < SIZE; i++) {{ {denormalize_inputs} }} @@ -225,7 +392,6 @@ def get_includes(lang): # ----------------------------------------------------------------------------- # Common to most of the tests - def get_content(op, typ, lang): cast = 'f32' if typ in ['f16', 'f32'] else 'f64' zero = 'nsimd_f32_to_f16(0.0f)' if typ == 'f16' else '({})0'.format(typ) @@ -253,67 +419,18 @@ def get_content(op, typ, lang): code += ['CHECK(vin{} = ({}*)nsimd_aligned_alloc(SIZE * {}));'. format(i, typ, common.sizeof(typ)) for i in nargs] vin_defi = '\n'.join(code) - if op.name in ['rec11', 'rec8', 'rsqrt11', 'rsqrt8']: - if typ == 'f16': - code = ['vin{}[i] = nsimd_f32_to_f16((float)rand() / ' \ - '(float)INT_MAX);'.format(i) for i in nargs] - else: - code = ['vin{}[i] = ({})((float)rand() / (float)INT_MAX);'. \ - format(i, typ) for i in nargs] - else: - code = ['vin{}[i] = rand{}();'.format(i, i) for i in nargs] - vin_rand = '\n'.join(code) - - # lgamma doesn't work for negative input or for too big inputs. - if op.name == 'lgamma' and typ == 'f64': - vin_rand = 'vin1[i] = rand() % 64;' + vin_rand = '\n'.join(['random(vin{}, SIZE, {});'.format(i, i - 1) \ + for i in nargs]) # Make vout_ref_comp - # We use MPFR on Linux to compare numerical results, but it is only on - # Linux as MPFR does not play well on Windows. On Windows we compare - # against the cpu implementation. When using MPFR, we set one element - # at a time => cpu_step = '1' - if op.tests_mpfr and sys.platform.startswith('linux'): - cpu_step = '1' - variables = ', '.join(['a{}'.format(i) for i in nargs]) - mpfr_inits = '\n'.join(['mpfr_init2(a{}, 64);'.format(i) - for i in nargs]) - if typ == 'f16': - mpfr_set = '''mpfr_set_flt(a{i}, nsimd_u16_to_f32( - ((u16 *)vin{i})[i]), MPFR_RNDN);''' - vout_ref_set = '''((u16 *)vout_ref)[i] = nsimd_f32_to_u16( - mpfr_get_flt(c, MPFR_RNDN));''' - elif typ == 'f32': - mpfr_set = 'mpfr_set_flt(a{i}, vin{i}[i], MPFR_RNDN);' - vout_ref_set = 'vout_ref[i] = mpfr_get_flt(c, MPFR_RNDN);' - else: - mpfr_set = 'mpfr_set_d(a{i}, vin{i}[i], MPFR_RNDN);' - vout_ref_set = 'vout_ref[i] = ({})mpfr_get_d(c, MPFR_RNDN);'. \ - format(typ) - mpfr_sets = '\n'.join([mpfr_set.format(i=j) for j in nargs]) - mpfr_clears = '\n'.join(['mpfr_clear(a{});'.format(i) - for i in nargs]) - vout_ref_comp = \ - '''mpfr_t c, {variables}; - mpfr_init2(c, 64); - {mpfr_inits} - {mpfr_sets} - {mpfr_op_name}(c, {variables}, MPFR_RNDN); - {vout_ref_set} - mpfr_clear(c); - {mpfr_clears}'''. \ - format(variables=variables, mpfr_sets=mpfr_sets, - mpfr_clears=mpfr_clears, vout_ref_set=vout_ref_set, - mpfr_op_name=op.tests_mpfr_name(), mpfr_inits=mpfr_inits) - else: - args = ', '.join(['va{}'.format(i) for i in nargs]) - code = ['nsimd_cpu_v{}{} {}, vc;'.format(logical, typ, args)] - code += ['va{} = nsimd_load{}u_cpu_{}(&vin{}[i]);'. - format(i, logical, typ, i) for i in nargs] - code += ['vc = nsimd_{}_cpu_{}({});'.format(op.name, typ, args)] - code += ['nsimd_store{}u_cpu_{}(&vout_ref[i], vc);'. \ - format(logical, typ)] - vout_ref_comp = '\n'.join(code) + args = ', '.join(['va{}'.format(i) for i in nargs]) + code = ['nsimd_cpu_v{}{} {}, vc;'.format(logical, typ, args)] + code += ['va{} = nsimd_load{}u_cpu_{}(&vin{}[i]);'. + format(i, logical, typ, i) for i in nargs] + code += ['vc = nsimd_{}_cpu_{}({});'.format(op.name, typ, args)] + code += ['nsimd_store{}u_cpu_{}(&vout_ref[i], vc);'. \ + format(logical, typ)] + vout_ref_comp = '\n'.join(code) # Make vout_nsimd_comp args = ', '.join(['va{}'.format(i) for i in nargs]) @@ -324,6 +441,14 @@ def get_content(op, typ, lang): code += ['vc = v{}({}, {});'.format(op.name, args, typ)] code += ['vstore{}u(&vout_nsimd[i], vc, {});'.format(logical, typ)] vout_nsimd_comp = '\n'.join(code) + if lang == 'c_adv': + code = ['nsimd_pack{}_{} {}, vc;'.format(logical, typ, args)] + code += ['va{} = nsimd_load{}u(nsimd_pack{}_{}, &vin{}[i]);'. + format(i, logical, logical, typ, i) for i in nargs] + code += ['vc = nsimd_{}({});'.format(op.name, args)] + code += ['nsimd_store{}u(&vout_nsimd[i], vc);'. \ + format(logical, typ)] + vout_nsimd_comp = '\n'.join(code) if lang == 'cxx_base': code = ['vec{}({}) {}, vc;'.format(logical, typ, args)] code += ['va{} = nsimd::load{}u(&vin{}[i], {}());'. @@ -356,33 +481,42 @@ def get_content(op, typ, lang): CHECK(vin1 = ({typ}*)nsimd_aligned_alloc(SIZE * {sizeof})); CHECK(vin2 = ({typ}*)nsimd_aligned_alloc(SIZE * {sizeof}));'''. \ format(typ=typ, sizeof=common.sizeof(typ)) - code = ['vin{}[i] = rand{}();'.format(i,i) for i in nargs] + code = ['random(vin{}, SIZE, {});'.format(i, i - 1) for i in nargs] vin_rand = '\n'.join(code) vout_ref_comp = '''nsimd_cpu_v{typ} va1, va2; - nsimd_cpu_vl{typ} vc; - va1 = nsimd_loadu_cpu_{typ}(&vin1[i]); - va2 = nsimd_loadu_cpu_{typ}(&vin2[i]); - vc = nsimd_{op_name}_cpu_{typ}(va1, va2); - nsimd_storelu_cpu_{typ}(&vout_ref[i], vc);'''. \ - format(typ=typ, op_name=op.name) + nsimd_cpu_vl{typ} vc; + va1 = nsimd_loadu_cpu_{typ}(&vin1[i]); + va2 = nsimd_loadu_cpu_{typ}(&vin2[i]); + vc = nsimd_{op_name}_cpu_{typ}(va1, va2); + nsimd_storelu_cpu_{typ}(&vout_ref[i], vc);'''. \ + format(typ=typ, op_name=op.name) if lang == 'c_base': vout_nsimd_comp = '''vec({typ}) va1, va2; - vecl({typ}) vc; - va1 = vloadu(&vin1[i], {typ}); - va2 = vloadu(&vin2[i], {typ}); - vc = v{op_name}(va1, va2, {typ}); - vstorelu(&vout_nsimd[i], vc, {typ});'''. \ - format(typ=typ, op_name=op.name) + vecl({typ}) vc; + va1 = vloadu(&vin1[i], {typ}); + va2 = vloadu(&vin2[i], {typ}); + vc = v{op_name}(va1, va2, {typ}); + vstorelu(&vout_nsimd[i], vc, {typ});'''. \ + format(typ=typ, op_name=op.name) + if lang == 'c_adv': + vout_nsimd_comp = '''nsimd_pack_{typ} va1, va2; + nsimd_packl_{typ} vc; + va1 = nsimd_loadu(nsimd_pack_{typ}, &vin1[i]); + va2 = nsimd_loadu(nsimd_pack_{typ}, &vin2[i]); + vc = nsimd_{op_name}(va1, va2); + nsimd_storelu(&vout_nsimd[i], vc);'''. \ + format(typ=typ, op_name=op.name) if lang == 'cxx_base': - vout_nsimd_comp = '''vec({typ}) va1, va2; - vecl({typ}) vc; - va1 = nsimd::loadu(&vin1[i], {typ}()); - va2 = nsimd::loadu(&vin2[i], {typ}()); - vc = nsimd::{op_name}(va1, va2, {typ}()); - nsimd::storelu(&vout_nsimd[i], vc, {typ}());'''. \ - format(typ=typ, op_name=op.name) + vout_nsimd_comp = \ + '''vec({typ}) va1, va2; + vecl({typ}) vc; + va1 = nsimd::loadu(&vin1[i], {typ}()); + va2 = nsimd::loadu(&vin2[i], {typ}()); + vc = nsimd::{op_name}(va1, va2, {typ}()); + nsimd::storelu(&vout_nsimd[i], vc, {typ}());'''. \ + format(typ=typ, op_name=op.name) if lang == 'cxx_adv': if op.cxx_operator: do_computation = 'vc = va1 {} va2;'. \ @@ -390,21 +524,22 @@ def get_content(op, typ, lang): else: do_computation = 'vc = nsimd::{}(va1, va2, {}());'. \ format(op.name, typ) - vout_nsimd_comp = '''nsimd::pack<{typ}> va1, va2; - nsimd::packl<{typ}> vc; - va1 = nsimd::loadu >(&vin1[i]); - va2 = nsimd::loadu >(&vin2[i]); - {do_computation} - nsimd::storelu(&vout_nsimd[i], vc);'''. \ - format(typ=typ, op_name=op.name, - do_computation=do_computation) + vout_nsimd_comp = \ + '''nsimd::pack<{typ}> va1, va2; + nsimd::packl<{typ}> vc; + va1 = nsimd::loadu >(&vin1[i]); + va2 = nsimd::loadu >(&vin2[i]); + {do_computation} + nsimd::storelu(&vout_nsimd[i], vc);'''. \ + format(typ=typ, op_name=op.name, + do_computation=do_computation) elif op.params == ['v', 'v', 'p']: vin_defi = \ '''{typ} *vin1; CHECK(vin1 = ({typ}*)nsimd_aligned_alloc(SIZE * {sizeof}));'''. \ format(typ=typ, sizeof=common.sizeof(typ)) - vin_rand = 'vin1[i] = rand1();'.format(typ=typ) + vin_rand = 'random(vin1, SIZE, 0);' vout_ref_comp = \ '''nsimd_cpu_v{typ} va1, vc; va1 = nsimd_loadu_cpu_{typ}(&vin1[i]); @@ -418,6 +553,13 @@ def get_content(op, typ, lang): vc = v{op_name}(va1, (i / step) % {typnbytes}, {typ}); vstoreu(&vout_nsimd[i], vc, {typ});'''. \ format(typ=typ, op_name=op.name, typnbytes=typ[1:]) + if lang == 'c_adv': + vout_nsimd_comp = \ + '''nsimd_pack_{typ} va1, vc; + va1 = nsimd_loadu(nsimd_pack_{typ}, &vin1[i]); + vc = nsimd_{op_name}(va1, (i / step) % {typnbytes}); + nsimd_storeu(&vout_nsimd[i], vc);'''. \ + format(typ=typ, op_name=op.name, typnbytes=typ[1:]) if lang == 'cxx_base': vout_nsimd_comp = \ '''vec({typ}) va1, vc; @@ -443,137 +585,39 @@ def get_content(op, typ, lang): raise ValueError('No test available for operator "{}" on type "{}"'. format(op.name, typ)) return { 'vin_defi': vin_defi, 'vin_rand': vin_rand, 'cpu_step': cpu_step, - 'vout_ref_comp': vout_ref_comp, 'vout_nsimd_comp': vout_nsimd_comp, + 'vout_ref_comp': vout_ref_comp, + 'vout_nsimd_comp': vout_nsimd_comp, 'denormalize_inputs': denormalize_inputs } # ----------------------------------------------------------------------------- # Generate test in C, C++ (base API) and C++ (advanced API) for almost all # tests -def gen_test(opts, op, typ, lang, ulps): +def gen_test(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return content = get_content(op, typ, lang) - extra_code = op.domain.gen_rand(typ) + extra_code = cbprng(typ, op, 'cpu') if op.name in ['notb', 'andb', 'orb', 'xorb', 'andnotb']: - comp = 'return nsimd_scalar_reinterpret_{uT}_{typ}(mpfr_out) != ' \ + comp = 'return nsimd_scalar_reinterpret_{uT}_{typ}(ref_out) != ' \ 'nsimd_scalar_reinterpret_{uT}_{typ}(nsimd_out)'. \ format(typ=typ, uT=common.bitfield_type[typ]) elif op.name in ['max', 'min'] and typ in common.ftypes: - comp = '''/* None of the architecture correctly manage NaN with the */ - /* function min and max. According to IEEE754, min(a, NaN) */ - /* should return a but every architecture returns NaN. */ - if (nsimd_isnan_{typ}(nsimd_out)) {{ - return 0; - }} - - /* PPC doesn't correctly manage +Inf and -Inf in relation */ - /* with NaN either (min(NaN, -Inf) returns -Inf). */ - #ifdef NSIMD_POWERPC - if (nsimd_isinf_{typ}(nsimd_out)) {{ - return 0; - }} - #endif - - return nsimd_scalar_ne_{typ}(mpfr_out, nsimd_out);'''. \ - format(typ=typ, uT=common.bitfield_type[typ]) + comp = 'return nsimd_scalar_ne_{}(ref_out, nsimd_out);'.format(typ) else: - if op.tests_ulps and typ in common.ftypes: - comp = 'return distance(mpfr_out, nsimd_out) > {}'. \ - format(op.tests_ulps[typ]) - extra_code += distance[typ] - elif op.src: - if op.name in ulps: - nbits = ulps[op.name][typ]["ulps"] - nbits_dnz = ulps[op.name][typ]["ulps for denormalized output"] - inf_error = ulps[op.name][typ]["Inf Error"] - nan_error = ulps[op.name][typ]["NaN Error"] - - if nan_error: - # Ignore error with NaN output, we know we will encounter - # some - comp += 'if (nsimd_isnan_{}(mpfr_out)) ' \ - '{{ return 0; }}\n'.format(typ) - else: - # Return false if one is NaN and not the other - comp += 'if (nsimd_isnan_{typ}(mpfr_out) ^ ' \ - 'nsimd_isnan_{typ}(nsimd_out)) ' \ - '{{ return 1; }} \n'.format(typ=typ) - - if inf_error: - # Ignore error with infinite output, we know we will - # encounter some - comp += 'if (nsimd_isinf_{}(mpfr_out)) ' \ - '{{ return 0; }}\n'.format(typ) - else: - # One is infinite and not the other - comp += 'if (nsimd_isinf_{typ}(mpfr_out) ^ ' \ - 'nsimd_isinf_{typ}(nsimd_out)) ' \ - '{{ return 1; }}\n'.format(typ=typ) - # Wrong sign for infinite - comp += 'if (nsimd_isinf_{typ}(mpfr_out) && ' \ - 'nsimd_isinf_{typ}(nsimd_out) && ' \ - '({right} * {left} < 0)) {{ return 1; }} \n'. \ - format(typ=typ) - - comp += \ - '''if (nsimd_isnormal_{typ}(mpfr_out)) {{ - return relative_distance((double){left}, (double){right}) - > get_2th_power(-({nbits})); - }} else {{ - return relative_distance((double){left}, (double){right}) - > get_2th_power(-({nbits_dnz})); - }}''' - - if lang == 'c_base': - comp = comp.format(left=left, right=right, nbits=nbits, - nbits_dnz=nbits_dnz) - else: - comp = comp.format(left=left, right=right, nbits=nbits, - nbits_dnz=nbits_dnz) - - else: - comp = 'return distance(mpfr_out, nsimd_out) > 1'. \ - format(left, right, nbits=nbits[typ]) - + if typ in common.ftypes: + comp = 'return distance(ref_out, nsimd_out) < {}'. \ + format(op.ufp[typ]) extra_code += distance[typ] else: - if typ in common.ftypes: - comp = \ - '''return nsimd_scalar_ne_{typ}(mpfr_out, nsimd_out) && - (!nsimd_isnan_{typ}(mpfr_out) || - !nsimd_isnan_{typ}(nsimd_out));'''. \ - format(typ=typ) - else: - comp = 'return mpfr_out != nsimd_out;' - - extra_code += '' + comp = 'return nsimd_scalar_ne_{}(ref_out, nsimd_out);'. \ + format(typ) includes = get_includes(lang) - if op.src or op.tests_ulps or op.tests_mpfr: - if lang == 'c_base': - includes = '''{} - - #include - #include - {}'''.format(posix_c_source, includes) - else: - includes = '''{} - - #include - #include - {}'''.format(posix_c_source, includes) - if op.tests_mpfr and sys.platform.startswith('linux'): - includes = includes + ''' - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wsign-conversion" - #include - #pragma GCC diagnostic pop - ''' if typ in common.ftypes: dnz_flush_to_zero = \ @@ -608,98 +652,91 @@ def gen_addv(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return - if lang == 'c_base': - op_test = 'v{}(vloada(buf, {}), {})'.format(op.name, typ, typ) - elif lang == 'cxx_base': - op_test = 'nsimd::{}(nsimd::loada(buf, {}()), {}())'.format( - op.name, typ, typ) - else: - op_test = 'nsimd::{}(nsimd::loada >(buf))'.format( - op.name, typ) - - head = '''{posix_c_source} - {includes} - #include - #include - - #define CHECK(a) {{ \\ - errno = 0; \\ - if (!(a)) {{ \\ - fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ - __LINE__, strerror(errno)); \\ - fflush(stderr); \\ - exit(EXIT_FAILURE); \\ - }} \\ - }} - - {distance} - - ''' .format(year=date.today().year, includes=get_includes(lang), - posix_c_source=posix_c_source, - distance=distance[typ]) if typ == 'f16': - # Variables initialization - init = '''f16 res = nsimd_f32_to_f16(0.0f); - f16 ref = nsimd_f32_to_f16(0.0f);''' - rand = '''nsimd_f32_to_f16((f32)(2 * (rand() % 2) - 1) * - (f32)(1 << (rand() % 4)) / - (f32)(1 << (rand() % 4)))''' - init_statement = 'buf[i] = {};'.format(rand) - ref_statement = 'ref = nsimd_scalar_add_f16(ref, buf[i]);' - test = 'if (distance(ref, res) > 1) { return EXIT_FAILURE; }' - elif typ in ['f32', 'f64']: - init = '''{typ} ref = ({typ})0; - {typ} res = ({typ})0;''' .format(typ=typ) - rand = '''({typ})(2 * (rand() % 2) - 1) * - ({typ})(1 << (rand() % 4)) / - ({typ})(1 << (rand() % 4))'''.format(typ=typ) - init_statement = 'buf[i] = {};'.format(rand) - ref_statement = 'ref += buf[i];' - test = 'if (distance(ref, res) > 1) { return EXIT_FAILURE; }' + rand = 'nsimd_f32_to_f16((f32)(rand() % 3) - 1.0f)' + zero = 'nsimd_f32_to_f16(0.0f)' + comp = 'nsimd_f16_to_f32(vout[i]) != nsimd_f16_to_f32(vref[i])' else: - init = '''{typ} ref = ({typ}) 0; - {typ} res = ({typ}) 0;'''.format(typ=typ) - rand = '({})(rand() % 4)'.format(typ) - init_statement = 'buf[i] = {rand};' .format(rand=rand) - ref_statement = 'ref += buf[i];' - test = 'if (ref != res) { return EXIT_FAILURE; }' + rand = '({})((int)(rand() % 3) - 1)'.format(typ) + zero = '({})0'.format(typ) + comp = 'vout[i] != vref[i]' + + if lang == 'c_base': + nsimd = 'vaddv(vloada(vin + (i * step), {typ}), {typ})'. \ + format(typ=typ) + elif lang == 'c_adv': + nsimd = 'nsimd_addv(nsimd_loada(nsimd_pack_{}, vin + (i * step)))'. \ + format(typ) + elif lang == 'cxx_base': + nsimd = 'nsimd::addv(nsimd::loada(vin + (i * step), {}()), {}())'. \ + format(typ, typ) + elif lang == 'cxx_adv': + nsimd = 'nsimd::addv(nsimd::loada >' \ + '(vin + (i * step)))'.format(typ) with common.open_utf8(opts, filename) as out: out.write( - ''' \ - {head} + '''{posix_c_source} + {includes} + + #define CHECK(a) {{ \\ + errno = 0; \\ + if (!(a)) {{ \\ + fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ + __LINE__, strerror(errno)); \\ + fflush(stderr); \\ + exit(EXIT_FAILURE); \\ + }} \\ + }} - int main(void) {{ + #define STATUS "test of addv over {typ}" - const int len = vlen({typ}); - {typ} *buf; - int i; - {init} + int main() {{ + int step = vlen({typ}); + int size = 2048; + int i; + {typ} *vin, *vref, *vout; - fprintf(stdout, "test of {op_name} over {typ}...\\n"); - CHECK(buf = ({typ} *)nsimd_aligned_alloc(len * {sizeof})); + CHECK(vin = ({typ} *)nsimd_aligned_alloc(size * {sizeof} * step)); + CHECK(vref = ({typ} *)nsimd_aligned_alloc(size * {sizeof})); + CHECK(vout = ({typ} *)nsimd_aligned_alloc(size * {sizeof})); - for (i = 0; i < len; i++) {{ - {init_statement} - }} + fprintf(stdout, STATUS "...\\n"); + fflush(stdout); - for (i = 0; i < len; i++) {{ - {ref_statement} - }} + for (i = 0; i < step * size; i++) {{ + vin[i] = {rand}; + }} - res = {op_test}; + for (i = 0; i < size; i++) {{ + int j; + {typ} acc = {zero}; + for (j = step * i; j < step * i + step; j++) {{ + acc = nsimd_scalar_add_{typ}(acc, vin[j]); + }} + vref[i] = acc; + }} - {test} + for (i = 0; i < size; i++) {{ + vout[i] = {nsimd}; + }} - fprintf(stdout, "test of {op_name} over {typ}... OK\\n"); - return EXIT_SUCCESS; - }} - '''.format(head=head, init=init, op_name=op.name, typ=typ, - sizeof=common.sizeof(typ), - init_statement=init_statement, - ref_statement=ref_statement, op_test=op_test, test=test) - ) + for (i = 0; i < size; i++) {{ + if ({comp}) {{ + fprintf(stdout, STATUS "... FAIL\\n"); + fflush(stdout); + return -1; + }} + }} + + fprintf(stdout, STATUS "... OK\\n"); + fflush(stdout); + return 0; + }} + '''.format(typ=typ, sizeof=common.sizeof(typ), zero=zero, rand=rand, + comp=comp, nsimd=nsimd, posix_c_source=posix_c_source, + includes=get_includes(lang))) common.clang_format(opts, filename) # ----------------------------------------------------------------------------- @@ -767,29 +804,37 @@ def zero_out_arrays(typ): def compute_op_given_language(typ, op, language): if 'c_base' == language: - return ''' - vec({typ}) va1, va2, vc; - va1 = vloadu(&vin1[outer], {typ}); - va2 = vloadu(&vin2[outer], {typ}); - vc = v{op}(va1, va2, {typ}); - vstoreu(&vout_computed[outer], vc, {typ}); - '''.format(typ=typ, op=op) + return \ + '''vec({typ}) va1, va2, vc; + va1 = vloadu(&vin1[outer], {typ}); + va2 = vloadu(&vin2[outer], {typ}); + vc = v{op}(va1, va2, {typ}); + vstoreu(&vout_computed[outer], vc, {typ});'''. \ + format(typ=typ, op=op) + elif 'c_adv' == language: + return \ + '''nsimd_pack_{typ} va1, va2, vc; + va1 = nsimd_loadu(nsimd_pack_{typ}, &vin1[outer]); + va2 = nsimd_loadu(nsimd_pack_{typ}, &vin2[outer]); + vc = nsimd_{op}(va1, va2); + nsimd_storeu(&vout_computed[outer], vc);'''. \ + format(typ=typ, op=op) elif 'cxx_base' == language: - return ''' - vec({typ}) va1, va2, vc; - va1 = nsimd::loadu(&vin1[outer], {typ}()); - va2 = nsimd::loadu(&vin2[outer], {typ}()); - vc = nsimd::{op}(va1, va2, {typ}()); - nsimd::storeu(&vout_computed[outer], vc, {typ}()); - '''.format(typ=typ, op=op) + return \ + '''vec({typ}) va1, va2, vc; + va1 = nsimd::loadu(&vin1[outer], {typ}()); + va2 = nsimd::loadu(&vin2[outer], {typ}()); + vc = nsimd::{op}(va1, va2, {typ}()); + nsimd::storeu(&vout_computed[outer], vc, {typ}());'''. \ + format(typ=typ, op=op) else: - return ''' - nsimd::pack<{typ}> va1, va2, vc; - va1 = nsimd::loadu >(&vin1[outer]); - va2 = nsimd::loadu >(&vin2[outer]); - vc = nsimd::{op}(va1, va2); - nsimd::storeu(&vout_computed[outer], vc); - '''.format(typ=typ, op=op) + return \ + '''nsimd::pack<{typ}> va1, va2, vc; + va1 = nsimd::loadu >(&vin1[outer]); + va2 = nsimd::loadu >(&vin2[outer]); + vc = nsimd::{op}(va1, va2); + nsimd::storeu(&vout_computed[outer], vc);'''. \ + format(typ=typ, op=op) def compare_expected_vs_computed(typ, op, language): values_computation = compute_op_given_language(typ, op, language) @@ -817,7 +862,8 @@ def compare_expected_vs_computed(typ, op, language): }} '''.format(typ=typ, values_computation=values_computation) -def test_signed_neither_overflow_nor_underflow(typ, min_, max_, operator, check): +def test_signed_neither_overflow_nor_underflow(typ, min_, max_, operator, + check): return ''' int test_neither_overflow_nor_underflow({typ} vin1[], {typ} vin2[], {typ} vout_expected[], @@ -1169,7 +1215,7 @@ def get_adds_tests_cases_given_type(typ): # ----------------------------------------------------------------------------- # gen_adds -def gen_adds(opts, op, typ, lang, ulps): +def gen_adds(opts, op, typ, lang): # Do not test for floats since adds(floats) == add(floats) if typ in common.ftypes: @@ -1239,11 +1285,14 @@ def gen_adds(opts, op, typ, lang, ulps): return EXIT_SUCCESS; }} ''' .format(head=head, - compare_expected_vs_computed=compare_expected_vs_computed(typ, op.name, lang), - random_sign_flip='' if typ in common.utypes else random_sign_flip(), + compare_expected_vs_computed=\ + compare_expected_vs_computed(typ, op.name, lang), + random_sign_flip='' if typ in common.utypes \ + else random_sign_flip(), zero_out_arrays=zero_out_arrays(typ), equal=equal(typ), - tests_helpers=get_adds_tests_cases_given_type(typ)['helpers'], + tests_helpers=\ + get_adds_tests_cases_given_type(typ)['helpers'], tests=get_adds_tests_cases_given_type(typ)['tests'], op_name = op.name, typ=typ, @@ -1275,9 +1324,10 @@ def subs_signed_is_underflow(typ, min_): def subs_signed_is_neither_overflow_nor_underflow(typ): return ''' - int subs_signed_is_neither_overflow_nor_underflow(const {typ} a, const {typ} b) - {{ - return ! subs_signed_is_overflow(a, b) && ! subs_signed_is_underflow(a, b); + int subs_signed_is_neither_overflow_nor_underflow(const {typ} a, + const {typ} b) {{ + return !subs_signed_is_overflow(a, b) && + !subs_signed_is_underflow(a, b); }} '''.format(typ=typ) @@ -1297,9 +1347,14 @@ def subs_unsigned_is_underflow(typ): # test signed integer overflow def test_subs_signed_overflow(typ, min_, max_): return ''' - int test_overflow({typ} vin1[], {typ} vin2[], {typ} vout_expected[], {typ} vout_computed[]) + int test_overflow({typ} vin1[], {typ} vin2[], {typ} vout_expected[], + {typ} vout_computed[]) {{ - /* if ((vin2[ii] < 0) && (vin1[ii] > {max_} + vin2[ii])) {{ overflow }} */ + /* + if ((vin2[ii] < 0) && (vin1[ii] > {max_} + vin2[ii])) {{ + overflow + }} + */ int ii = 0; /* vin2[ii] < 0 */ @@ -1334,18 +1389,25 @@ def test_subs_signed_overflow(typ, min_, max_): /* Test: - if ((vin2[ii] < 0) && (vin1[ii] > {max_} + vin2[ii])) {{ vout_expected[ii] == {max_}; }} + if ((vin2[ii] < 0) && (vin1[ii] > {max_} + vin2[ii])) {{ + vout_expected[ii] == {max_}; + }} */ - return compare_expected_vs_computed(vin1, vin2, vout_expected, vout_computed); + return compare_expected_vs_computed(vin1, vin2, vout_expected, + vout_computed); }} '''.format(typ=typ, min_=min_, max_=max_) # test signed underflow def test_subs_signed_underflow(typ, min_, max_): return ''' - int test_underflow({typ} vin1[], {typ} vin2[], {typ} vout_expected[], {typ} vout_computed[]) - {{ - /* if ((vin2[ii] > 0) && (vin1[ii] < {min_} + vin2[ii])) {{ underflow }} */ + int test_underflow({typ} vin1[], {typ} vin2[], {typ} vout_expected[], + {typ} vout_computed[]) {{ + /* + if ((vin2[ii] > 0) && (vin1[ii] < {min_} + vin2[ii])) {{ + underflow + }} + */ int ii = 0; /* vin2[ii] > 0 */ @@ -1370,9 +1432,12 @@ def test_subs_signed_underflow(typ, min_, max_): /* Test: - if ((vin2[ii] > 0) && (vin1[ii] < {min_} + vin2[ii])) {{ vout_expected[ii] == {min_}; }} + if ((vin2[ii] > 0) && (vin1[ii] < {min_} + vin2[ii])) {{ + vout_expected[ii] == {min_}; + }} */ - return compare_expected_vs_computed(vin1, vin2, vout_expected, vout_computed); + return compare_expected_vs_computed(vin1, vin2, vout_expected, + vout_computed); }} '''.format(typ=typ, min_=min_, max_=max_) @@ -1384,11 +1449,13 @@ def test_subs_signed_neither_overflow_nor_underflow(typ, min_, max_): # test signed all cases def test_subs_signed_all_cases(typ, min_, max_): - return test_signed_all_cases(typ, min_, max_, '-', 'subs_signed_is_overflow', 'subs_signed_is_underflow') + return test_signed_all_cases(typ, min_, max_, '-', + 'subs_signed_is_overflow', + 'subs_signed_is_underflow') # all signed tests def tests_subs_signed(): - return''' + return ''' zero_out_arrays(vin1, vin2, vout_expected, vout_computed); CHECK_CASE(test_overflow(vin1, vin2, vout_expected, vout_computed), "overflow"); @@ -1398,8 +1465,8 @@ def tests_subs_signed(): vout_computed), "underflow"); zero_out_arrays(vin1, vin2, vout_expected, vout_computed); - CHECK_CASE(test_neither_overflow_nor_underflow(vin1, vin2, vout_expected, vout_computed), - "neither underflow nor overflow"); + CHECK_CASE(test_neither_overflow_nor_underflow(vin1, vin2, vout_expected, + vout_computed), "neither underflow nor overflow"); zero_out_arrays(vin1, vin2, vout_expected, vout_computed); CHECK_CASE(test_all_cases(vin1, vin2, vout_expected, @@ -1412,13 +1479,15 @@ def tests_subs_signed(): # test unsigned underflow def test_subs_unsigned_underflow(typ, min_, max_): return ''' - int test_underflow({typ} vin1[], {typ} vin2[], {typ} vout_expected[], {typ} vout_computed[]) - {{ + int test_underflow({typ} vin1[], {typ} vin2[], {typ} vout_expected[], + {typ} vout_computed[]) {{ /* if (vin1[ii] < vin2[ii]) {{ underflow }} */ int ii = 0; /* vin1[ii] */ - for(ii = 0; ii < SIZE; ++ii){{ vin1[ii] = ({typ})(({typ})rand() % {max_}); }} + for(ii = 0; ii < SIZE; ++ii) {{ + vin1[ii] = ({typ})(({typ})rand() % {max_}); + }} /* vin1[ii] < vin2[ii] @@ -1437,20 +1506,23 @@ def test_subs_unsigned_underflow(typ, min_, max_): Test: if (vin1[ii] < vin2[ii]) {{ vout_expected[ii] == {min_}; }} */ - return compare_expected_vs_computed(vin1, vin2, vout_expected, vout_computed); + return compare_expected_vs_computed(vin1, vin2, vout_expected, + vout_computed); }} '''.format(typ=typ, min_=min_, max_=max_) # test unsigned no underflow def test_subs_unsigned_no_underflow(typ, max_): return ''' - int test_no_underflow({typ} vin1[], {typ} vin2[], {typ} vout_expected[], {typ} vout_computed[]) - {{ + int test_no_underflow({typ} vin1[], {typ} vin2[], {typ} vout_expected[], + {typ} vout_computed[]) {{ /* if (vin1[ii] >= vin2[ii]) {{ no underflow }} */ int ii = 0; /* vin1[ii] */ - for(ii = 0; ii < SIZE; ++ii){{ vin1[ii] = ({typ})(({typ})rand() % {max_}); }} + for(ii = 0; ii < SIZE; ++ii) {{ + vin1[ii] = ({typ})(({typ})rand() % {max_}); + }} /* vin1[ii] >= vin2[ii] @@ -1467,17 +1539,20 @@ def test_subs_unsigned_no_underflow(typ, max_): /* Test: - if (vin1[ii] >= vin2[ii]) {{ vout_expected[ii] == vin1[ii] - vin2[ii]; }} + if (vin1[ii] >= vin2[ii]) {{ + vout_expected[ii] == vin1[ii] - vin2[ii]; + }} */ - return compare_expected_vs_computed(vin1, vin2, vout_expected, vout_computed); + return compare_expected_vs_computed(vin1, vin2, vout_expected, + vout_computed); }} '''.format(typ=typ, max_=max_) # test signed all cases def test_subs_unsigned_all_cases(typ, min_, max_): return ''' - int test_all_cases({typ} vin1[], {typ} vin2[], {typ} vout_expected[], {typ} vout_computed[]) - {{ + int test_all_cases({typ} vin1[], {typ} vin2[], {typ} vout_expected[], + {typ} vout_computed[]) {{ int ii = 0; for(ii = 0; ii < SIZE; ++ii) {{ @@ -1490,7 +1565,8 @@ def test_subs_unsigned_all_cases(typ, min_, max_): else {{ vout_expected[ii] = ({typ})(vin1[ii] - vin2[ii]); }} }} /* Test all cases: */ - return compare_expected_vs_computed(vin1, vin2, vout_expected, vout_computed); + return compare_expected_vs_computed(vin1, vin2, vout_expected, + vout_computed); }} '''.format(typ=typ, min_=min_, max_=max_) @@ -1528,15 +1604,21 @@ def get_subs_tests_cases_for_signed_types(typ, min_, max_): {test_subs_signed_neither_overflow_nor_underflow} {test_subs_signed_all_cases} - ''' .format(test_subs_signed_overflow=test_subs_signed_overflow(typ, min_, max_), - test_subs_signed_underflow=test_subs_signed_underflow(typ, min_, max_), - subs_signed_is_overflow=subs_signed_is_overflow(typ, max_), - subs_signed_is_underflow=subs_signed_is_underflow(typ, min_), - subs_signed_is_neither_overflow_nor_underflow=subs_signed_is_neither_overflow_nor_underflow(typ), - test_subs_signed_neither_overflow_nor_underflow=test_subs_signed_neither_overflow_nor_underflow( + ''' .format(test_subs_signed_overflow=\ + test_subs_signed_overflow(typ, min_, max_), + test_subs_signed_underflow=\ + test_subs_signed_underflow(typ, min_, max_), + subs_signed_is_overflow=\ + subs_signed_is_overflow(typ, max_), + subs_signed_is_underflow=\ + subs_signed_is_underflow(typ, min_), + subs_signed_is_neither_overflow_nor_underflow=\ + subs_signed_is_neither_overflow_nor_underflow(typ), + test_subs_signed_neither_overflow_nor_underflow=\ + test_subs_signed_neither_overflow_nor_underflow( typ, min_=min_, max_=max_), - test_subs_signed_all_cases=test_subs_signed_all_cases(typ, min_=min_, max_=max_) - ) + test_subs_signed_all_cases=\ + test_subs_signed_all_cases(typ, min_=min_, max_=max_)) return {'helpers': helpers, 'tests': tests_subs_signed()} def get_subs_tests_cases_for_unsigned_types(typ, min_, max_): @@ -1548,11 +1630,14 @@ def get_subs_tests_cases_for_unsigned_types(typ, min_, max_): {subs_unsigned_is_underflow} {test_subs_unsigned_all_cases} - ''' .format(test_subs_unsigned_underflow=test_subs_unsigned_underflow(typ, min_, max_), - test_subs_unsigned_no_underflow=test_subs_unsigned_no_underflow(typ, max_), - subs_unsigned_is_underflow=subs_unsigned_is_underflow(typ), - test_subs_unsigned_all_cases=test_subs_unsigned_all_cases(typ, min_, max_) - ) + ''' .format(test_subs_unsigned_underflow=\ + test_subs_unsigned_underflow(typ, min_, max_), + test_subs_unsigned_no_underflow=\ + test_subs_unsigned_no_underflow(typ, max_), + subs_unsigned_is_underflow=\ + subs_unsigned_is_underflow(typ), + test_subs_unsigned_all_cases=\ + test_subs_unsigned_all_cases(typ, min_, max_)) return {'helpers': helpers, 'tests': tests_subs_unsigned()} def get_subs_tests_cases_given_type(typ): @@ -1562,10 +1647,12 @@ def get_subs_tests_cases_given_type(typ): max_ = type_limits['max'] if typ in common.itypes: - return get_subs_tests_cases_for_signed_types(typ=typ, min_=min_, max_=max_) + return get_subs_tests_cases_for_signed_types( + typ=typ, min_=min_, max_=max_) if typ in common.utypes: - return get_subs_tests_cases_for_unsigned_types(typ=typ, min_=min_, max_=max_) + return get_subs_tests_cases_for_unsigned_types( + typ=typ, min_=min_, max_=max_) else: msg = '{typ} not implemented'.format(typ=typ) raise TypeError(msg) @@ -1573,7 +1660,7 @@ def get_subs_tests_cases_given_type(typ): # ----------------------------------------------------------------------------- # gen_subs -def gen_subs(opts, op, typ, lang, ulps): +def gen_subs(opts, op, typ, lang): # Do not test for floats since subs(floats) == sub(floats) if typ in common.ftypes: @@ -1586,73 +1673,70 @@ def gen_subs(opts, op, typ, lang, ulps): sizeof = common.sizeof(typ) - head = ''' - {includes} - #include + head = \ + '''{includes} + #include - #define SIZE (2048 / {sizeof}) + #define SIZE (2048 / {sizeof}) - #define STATUS "test of {op_name} over {typ}" + #define STATUS "test of {op_name} over {typ}" - {aligned_alloc_error} + {aligned_alloc_error} - {adds_subs_check_case} - ''' .format(includes=get_includes(lang), - op_name=op.name, - typ=typ, - sizeof=sizeof, - aligned_alloc_error=aligned_alloc_error(), - adds_subs_check_case=adds_subs_check_case()) + {adds_subs_check_case}'''. \ + format(includes=get_includes(lang), op_name=op.name, typ=typ, + sizeof=sizeof, aligned_alloc_error=aligned_alloc_error(), + adds_subs_check_case=adds_subs_check_case()) with common.open_utf8(opts, filename) as out: - out.write( - ''' \ - {head} - /* ------------------------------------------------------------------------- */ + out.write(''' + {head} - {random_sign_flip} + {hbar} - {zero_out_arrays} + {random_sign_flip} - {equal} + {zero_out_arrays} - {compare_expected_vs_computed} + {equal} - {tests_helpers} + {compare_expected_vs_computed} - int main(void) - {{ - const int mem_aligned_size = SIZE * {sizeof}; + {tests_helpers} - {typ} *vin1; - {typ} *vin2; + int main(void) + {{ + const int mem_aligned_size = SIZE * {sizeof}; - {typ} *vout_expected; - {typ} *vout_computed; + {typ} *vin1; + {typ} *vin2; - CHECK(vin1 = ({typ} *)nsimd_aligned_alloc(mem_aligned_size)); - CHECK(vin2 = ({typ} *)nsimd_aligned_alloc(mem_aligned_size)); + {typ} *vout_expected; + {typ} *vout_computed; - CHECK(vout_expected = ({typ} *)nsimd_aligned_alloc(mem_aligned_size)); - CHECK(vout_computed = ({typ} *)nsimd_aligned_alloc(mem_aligned_size)); + CHECK(vin1 = ({typ} *)nsimd_aligned_alloc(mem_aligned_size)); + CHECK(vin2 = ({typ} *)nsimd_aligned_alloc(mem_aligned_size)); - {tests} + CHECK(vout_expected = ({typ} *)nsimd_aligned_alloc(mem_aligned_size)); + CHECK(vout_computed = ({typ} *)nsimd_aligned_alloc(mem_aligned_size)); - fprintf(stdout, STATUS "... OK\\n"); - fflush(stdout); - return EXIT_SUCCESS; - }} - ''' .format(head=head, - compare_expected_vs_computed=compare_expected_vs_computed(typ, op.name, lang), - random_sign_flip='' if typ in common.utypes else random_sign_flip(), - zero_out_arrays=zero_out_arrays(typ), - equal=equal(typ), - tests_helpers=get_subs_tests_cases_given_type(typ)['helpers'], - tests=get_subs_tests_cases_given_type(typ)['tests'], - op_name = op.name, - typ=typ, - sizeof = sizeof) - ) + {tests} + + fprintf(stdout, STATUS "... OK\\n"); + fflush(stdout); + return EXIT_SUCCESS; + }} + '''.format(head=head, + compare_expected_vs_computed=\ + compare_expected_vs_computed(typ, op.name, lang), + random_sign_flip='' if typ in common.utypes \ + else random_sign_flip(), + zero_out_arrays=zero_out_arrays(typ), + equal=equal(typ), + tests_helpers=\ + get_subs_tests_cases_given_type(typ)['helpers'], + tests=get_subs_tests_cases_given_type(typ)['tests'], + op_name=op.name, typ=typ, hbar=common.hbar, sizeof=sizeof)) common.clang_format(opts, filename) @@ -1665,6 +1749,9 @@ def gen_all_any(opts, op, typ, lang): return if lang == 'c_base': op_test = 'v{}(vloadla(buf, {}), {})'.format(op.name, typ, typ) + elif lang == 'c_adv': + op_test = 'nsimd_{}(nsimd_loadla(nsimd_packl_{}, buf))'. \ + format(op.name, typ) elif lang == 'cxx_base': op_test = 'nsimd::{}(nsimd::loadla(buf, {}()), {}())'. \ format(op.name, typ, typ) @@ -1750,6 +1837,12 @@ def gen_load_store(opts, op, typ, lang): '''vecx{deg}({typ}) v = vload{deg}{align}(&vin[i], {typ}); vstore{deg}{align}(&vout[i], {variables}, {typ});'''. \ format(deg=deg, typ=typ, align=align, variables=variables) + elif lang == 'c_adv': + load_store = \ + '''nsimd_packx{deg}_{typ} v = + nsimd_load{deg}{align}(nsimd_packx{deg}_{typ}, &vin[i]); + nsimd_store{deg}{align}(&vout[i], {variables});'''. \ + format(deg=deg, typ=typ, align=align, variables=variables) elif lang == 'cxx_base': load_store = \ '''vecx{deg}({typ}) v = nsimd::load{deg}{align}(&vin[i], {typ}()); @@ -1853,6 +1946,23 @@ def gen_gather_scatter(opts, op, typ, lang): offsets = vadd(offsets, vset1(({ityp})1, {ityp}), {ityp}); vscatter(vout, offsets, v, {typ});'''. \ format(typ=typ, ityp=ityp) + elif lang == 'c_adv': + if op.name == 'gather_linear': + gather_scatter = \ + '''nsimd_scatter_linear( + vout + 1, 2, nsimd_gather_linear( + nsimd_pack_{}, vin, 2));'''.format(typ) + else: + gather_scatter = \ + '''nsimd_pack_{ityp} offsets = nsimd_mul(nsimd_iota( + nsimd_pack_{ityp}), nsimd_set1( + nsimd_pack_{ityp}, ({ityp})2)); + nsimd_pack_{typ} v = nsimd_gather( + nsimd_pack_{typ}, vin, offsets); + offsets = nsimd_add(offsets, nsimd_set1(nsimd_pack_{ityp}, + ({ityp})1)); + nsimd_scatter(vout, offsets, v);'''. \ + format(typ=typ, ityp=ityp) elif lang == 'cxx_base': if op.name == 'gather_linear': gather_scatter = '''nsimd::scatter_linear(vout + 1, 2, @@ -1973,6 +2083,16 @@ def gen_mask_scatter(opts, op, typ, lang): vecl({typ}) mask = vmask_for_loop_tail(0, i, {typ}); vmask_scatter(mask, vout, offsets, vset1({two}, {typ}), {typ});'''.format(two=two, typ=typ, ityp=ityp) + if lang == 'c_adv': + mask_scatter = \ + '''nsimd_pack_{ityp} offsets = nsimd_mul(nsimd_iota( + nsimd_pack_{ityp}), nsimd_set1( + nsimd_pack_{ityp}, ({ityp})2)); + nsimd_packl_{typ} mask = nsimd_mask_for_loop_tail( + nsimd_pack_{typ}, 0, i); + nsimd_mask_scatter(mask, vout, offsets, nsimd_set1( + nsimd_pack_{typ}, {two}));'''. \ + format(two=two, typ=typ, ityp=ityp) elif lang == 'cxx_base': mask_scatter = \ '''vec({ityp}) offsets = nsimd::mul(nsimd::iota({ityp}()), @@ -2093,6 +2213,18 @@ def gen_maskoz_gather(opts, op, typ, lang): vstoreu(vout, vmask{oz}_gather(mask, vin, offsets{ta}, {typ}), {typ});'''. \ format(typ=typ, ityp=ityp, ta=ta, oz=oz) + if lang == 'c_adv': + ta = ', nsimd_set1(nsimd_pack_{typ}, {three})'. \ + format(three=three, typ=typ) if op.name == 'masko_gather' else '' + maskoz_gather = \ + '''nsimd_pack_{ityp} offsets = nsimd_mul(nsimd_iota( + nsimd_pack_{ityp}), nsimd_set1( + nsimd_pack_{ityp}, ({ityp})2)); + nsimd_packl_{typ} mask = nsimd_mask_for_loop_tail( + nsimd_pack_{typ}, 0, i); + nsimd_storeu(vout, nsimd_mask{oz}_gather( + mask, vin, offsets{ta}));'''. \ + format(typ=typ, ityp=ityp, ta=ta, oz=oz) elif lang == 'cxx_base': ta = ', nsimd::set1({three}, {typ}())'.format(three=three, typ=typ) \ if op.name == 'masko_gather' else '' @@ -2198,6 +2330,13 @@ def gen_mask_load(opts, op, typ, lang): vec({typ}) other = vset1({m1}, {typ}); vstoreu(vout, v{op_name}(mask, vin, other, {typ}), {typ});'''. \ format(typ=typ, op_name=op.name, m1=m1) + elif lang == 'c_adv': + test = \ + '''nsimd_packl_{typ} mask = nsimd_mask_for_loop_tail( + nsimd_packl_{typ}, 0, i); + nsimd_pack_{typ} other = nsimd_set1(nsimd_pack_{typ}, {m1}); + nsimd_storeu(vout, nsimd_{op_name}(mask, vin, other));'''. \ + format(typ=typ, op_name=op.name, m1=m1) elif lang == 'cxx_base': test = \ '''vecl({typ}) mask = nsimd::mask_for_loop_tail(0, i, {typ}()); @@ -2221,6 +2360,12 @@ def gen_mask_load(opts, op, typ, lang): '''vecl({typ}) mask = vmask_for_loop_tail(0, i, {typ}); vstoreu(vout, v{op_name}(mask, vin, {typ}), {typ});'''. \ format(typ=typ, op_name=op.name, m1=m1) + elif lang == 'c_adv': + test = \ + '''nsimd_packl_{typ} mask = nsimd_mask_for_loop_tail( + nsimd_packl_{typ}, 0, i); + nsimd_storeu(vout, nsimd_{op_name}(mask, vin));'''. \ + format(typ=typ, op_name=op.name, m1=m1) elif lang == 'cxx_base': test = \ '''vecl({typ}) mask = nsimd::mask_for_loop_tail(0, i, {typ}()); @@ -2324,6 +2469,13 @@ def gen_mask_store(opts, op, typ, lang): '''vecl({typ}) mask = vmask_for_loop_tail(0, i, {typ}); v{op_name}(mask, vout, vset1({one}, {typ}), {typ});'''. \ format(typ=typ, op_name=op.name, one=one) + elif lang == 'c_adv': + test = \ + '''nsimd_packl_{typ} mask = nsimd_mask_for_loop_tail( + nsimd_packl_{typ}, 0, i); + nsimd_{op_name}(mask, vout, nsimd_set1( + nsimd_pack_{typ}, {one}));'''. \ + format(typ=typ, op_name=op.name, one=one) elif lang == 'cxx_base': test = \ '''vecl({typ}) mask = nsimd::mask_for_loop_tail(0, i, {typ}()); @@ -2491,6 +2643,9 @@ def gen_iota(opts, op, typ, lang): return if lang == 'c_base': do_iota = 'vstoreu(buf, viota({typ}), {typ});'.format(typ=typ) + elif lang == 'c_adv': + do_iota = 'nsimd_storeu(buf, nsimd_iota(nsimd_pack_{typ}));'. \ + format(typ=typ) elif lang == 'cxx_base': do_iota = 'nsimd::storeu(buf, nsimd::iota({typ}()), {typ}());'. \ format(typ=typ) @@ -2537,8 +2692,9 @@ def gen_nbtrue(opts, op, typ, lang): if filename == None: return if lang == 'c_base': - nbtrue = 'vnbtrue(vloadla(buf, {}), {})'. \ - format(typ, typ, typ) + nbtrue = 'vnbtrue(vloadla(buf, {}), {})'.format(typ, typ) + elif lang == 'c_adv': + nbtrue = 'nsimd_nbtrue(nsimd_loadla(nsimd_packl_{}, buf))'.format(typ) elif lang == 'cxx_base': nbtrue = 'nsimd::nbtrue(nsimd::loadla(buf, {}()), {}())'. \ format(typ, typ) @@ -2631,8 +2787,31 @@ def gen_reinterpret_convert(opts, op, from_typ, to_typ, lang): comp = '''vstore{logical}a(out, v{op_name}(v{op_name}( vload{logical}a(in, {from_typ}), {from_typ}, {to_typ}), {to_typ}, {from_typ}), {from_typ});'''. \ - format(op_name=op.name, from_typ=from_typ, - to_typ=to_typ, logical=logical) + format(op_name=op.name, from_typ=from_typ, + to_typ=to_typ, logical=logical) + elif lang == 'c_adv': + if op.name == 'upcvt': + comp = '''{{ + nsimd_packx2_{to_typ} tmp = + nsimd_upcvt(nsimd_packx2_{to_typ}, + nsimd_loada(nsimd_pack_{from_typ}, in)); + nsimd_storea(out, nsimd_downcvt( + nsimd_pack_{from_typ}, tmp.v0, tmp.v1)); + }}'''.format(op_name=op.name, from_typ=from_typ, + to_typ=to_typ, logical=logical) + elif op.name == 'to_mask': + comp = '''nsimd_storela(out, nsimd_to_logical(nsimd_to_mask( + nsimd_loadla(nsimd_packl_{typ}, in))));'''. \ + format(typ=from_typ) + else: + comp = \ + '''nsimd_store{logical}a(out, nsimd_{op_name}( + nsimd_pack{logical}_{from_typ}, + nsimd_{op_name}(nsimd_pack{logical}_{to_typ}, + nsimd_load{logical}a(nsimd_pack{logical}_{from_typ}, + in))));'''. \ + format(op_name=op.name, from_typ=from_typ, + to_typ=to_typ, logical=logical) elif lang == 'cxx_base': if op.name == 'upcvt': comp = '''vecx2({to_typ}) tmp = @@ -2756,6 +2935,9 @@ def gen_reverse(opts, op, typ, lang): test_code = \ 'vstorea(out, vreverse(vloada(in, {typ}), {typ}), {typ});'. \ format(typ=typ) + elif lang == 'c_adv': + test_code = '''nsimd_storea(out, nsimd_reverse(nsimd_loada( + nsimd_pack_{typ}, in)));'''.format(typ=typ) elif lang == 'cxx_base': test_code = \ 'nsimd::storea(out, nsimd::reverse(nsimd::loada(in, {typ}()), ' \ @@ -2840,39 +3022,47 @@ def gen_unpack_half(opts, op, typ, lang): if filename == None: return if typ == 'f16': - left = '(double)nsimd_f16_to_f32(mpfr_out)' + left = '(double)nsimd_f16_to_f32(ref_out)' right = '(double)nsimd_f16_to_f32(nsimd_out)' elif typ == 'f32': - left = '(double)mpfr_out' + left = '(double)ref_out' right = '(double)nsimd_out' else: - left = 'mpfr_out' + left = 'ref_out' right = 'nsimd_out' if lang == 'c_base': typ_nsimd = 'vec({typ})'.format(typ=typ) vout1_comp = '''vec({typ}) va1, va2, vc; - va1 = vloadu(&vin1[i], {typ}); - va2 = vloadu(&vin2[i], {typ}); - vc = v{op_name}(va1, va2, {typ}); - vstoreu(&vout[i], vc, {typ});'''. \ - format(typ=typ, op_name=op.name) + va1 = vloadu(&vin1[i], {typ}); + va2 = vloadu(&vin2[i], {typ}); + vc = v{op_name}(va1, va2, {typ}); + vstoreu(&vout[i], vc, {typ});'''. \ + format(typ=typ, op_name=op.name) + if lang == 'c_adv': + typ_nsimd = 'nsimd_pack_{typ}'.format(typ=typ) + vout1_comp = '''nsimd_pack_{typ} va1, va2, vc; + va1 = nsimd_loadu(nsimd_pack_{typ}, &vin1[i]); + va2 = nsimd_loadu(nsimd_pack_{typ}, &vin2[i]); + vc = nsimd_{op_name}(va1, va2); + nsimd_storeu(&vout[i], vc);'''. \ + format(typ=typ, op_name=op.name) if lang == 'cxx_base': typ_nsimd = 'vec({typ})'.format(typ=typ) vout1_comp = '''vec({typ}) va1, va2, vc; - va1 = nsimd::loadu(&vin1[i], {typ}()); - va2 = nsimd::loadu(&vin2[i], {typ}()); - vc = nsimd::{op_name}(va1, va2, {typ}()); - nsimd::storeu(&vout[i], vc, {typ}());'''. \ - format(typ=typ, op_name=op.name) + va1 = nsimd::loadu(&vin1[i], {typ}()); + va2 = nsimd::loadu(&vin2[i], {typ}()); + vc = nsimd::{op_name}(va1, va2, {typ}()); + nsimd::storeu(&vout[i], vc, {typ}());'''. \ + format(typ=typ, op_name=op.name) if lang == 'cxx_adv': typ_nsimd = 'nsimd::pack<{typ}>'.format(typ=typ) vout1_comp = '''nsimd::pack<{typ}> va1, va2, vc; - va1 = nsimd::loadu >(&vin1[i]); - va2 = nsimd::loadu >(&vin2[i]); - vc = nsimd::{op_name}(va1, va2); - nsimd::storeu(&vout[i], vc);'''. \ - format(typ=typ, op_name=op.name) + va1 = nsimd::loadu >(&vin1[i]); + va2 = nsimd::loadu >(&vin2[i]); + vc = nsimd::{op_name}(va1, va2); + nsimd::storeu(&vout[i], vc);'''. \ + format(typ=typ, op_name=op.name) op_test = 'step/(2 * nb_lane)' if op.name in['ziphi', 'ziplo']: @@ -3007,45 +3197,59 @@ def gen_unpack(opts, op, typ, lang): if filename == None: return if typ == 'f16': - left = '(double)nsimd_f16_to_f32(mpfr_out)' + left = '(double)nsimd_f16_to_f32(ref_out)' right = '(double)nsimd_f16_to_f32(nsimd_out)' elif typ == 'f32': - left = '(double)mpfr_out' + left = '(double)ref_out' right = '(double)nsimd_out' else: - left = 'mpfr_out' + left = 'ref_out' right = 'nsimd_out' if lang == 'c_base': typ_nsimd = 'vec({typ})'.format(typ=typ) - vout1_comp = '''vec({typ}) va1, va2; - vecx2({typ}) vc; - va1 = vloadu(&vin1[i], {typ}); - va2 = vloadu(&vin2[i], {typ}); - vc = v{op_name}(va1, va2, {typ}); - vstoreu(&vout[2 * i], vc.v0, {typ}); - vstoreu(&vout[2 * i + vlen({typ})], vc.v1, {typ});'''. \ - format(typ=typ, op_name=op.name) + vout1_comp = \ + '''vec({typ}) va1, va2; + vecx2({typ}) vc; + va1 = vloadu(&vin1[i], {typ}); + va2 = vloadu(&vin2[i], {typ}); + vc = v{op_name}(va1, va2, {typ}); + vstoreu(&vout[2 * i], vc.v0, {typ}); + vstoreu(&vout[2 * i + vlen({typ})], vc.v1, {typ});'''. \ + format(typ=typ, op_name=op.name) + if lang == 'c_adv': + typ_nsimd = 'nsimd_pack_{typ}'.format(typ=typ) + vout1_comp = \ + '''nsimd_pack_{typ} va1, va2; + nsimd_packx2_{typ} vc; + va1 = nsimd_loadu(nsimd_pack_{typ}, &vin1[i]); + va2 = nsimd_loadu(nsimd_pack_{typ}, &vin2[i]); + vc = nsimd_{op_name}(va1, va2); + nsimd_storeu(&vout[2 * i], vc.v0); + nsimd_storeu(&vout[2 * i + nsimd_len(nsimd_pack_{typ})], + vc.v1);'''.format(typ=typ, op_name=op.name) if lang == 'cxx_base': typ_nsimd = 'vec({typ})'.format(typ=typ) - vout1_comp = '''vec({typ}) va1, va2; - vecx2({typ}) vc; - va1 = nsimd::loadu(&vin1[i], {typ}()); - va2 = nsimd::loadu(&vin2[i], {typ}()); - vc = nsimd::{op_name}(va1, va2, {typ}()); - nsimd::storeu(&vout[2 * i], vc.v0, {typ}()); - nsimd::storeu(&vout[2 * i + vlen({typ})], vc.v1, {typ}());'''. \ - format(typ=typ, op_name=op.name) + vout1_comp = \ + '''vec({typ}) va1, va2; + vecx2({typ}) vc; + va1 = nsimd::loadu(&vin1[i], {typ}()); + va2 = nsimd::loadu(&vin2[i], {typ}()); + vc = nsimd::{op_name}(va1, va2, {typ}()); + nsimd::storeu(&vout[2 * i], vc.v0, {typ}()); + nsimd::storeu(&vout[2 * i + vlen({typ})], vc.v1, {typ}());'''. \ + format(typ=typ, op_name=op.name) if lang == 'cxx_adv': typ_nsimd = 'nsimd::pack<{typ}>'.format(typ=typ) - vout1_comp = '''nsimd::pack<{typ}> va1, va2; - nsimd::packx2<{typ}> vc; - va1 = nsimd::loadu >(&vin1[i]); - va2 = nsimd::loadu >(&vin2[i]); - vc = nsimd::{op_name}(va1, va2); - nsimd::storeu(&vout[2 * i], vc.v0); - nsimd::storeu(&vout[2 * i + nsimd::len({typ}())], vc.v1);'''. \ - format(typ=typ, op_name=op.name) + vout1_comp = \ + '''nsimd::pack<{typ}> va1, va2; + nsimd::packx2<{typ}> vc; + va1 = nsimd::loadu >(&vin1[i]); + va2 = nsimd::loadu >(&vin2[i]); + vc = nsimd::{op_name}(va1, va2); + nsimd::storeu(&vout[2 * i], vc.v0); + nsimd::storeu(&vout[2 * i + nsimd::len({typ}())], vc.v1);'''. \ + format(typ=typ, op_name=op.name) head = '''{posix_c_source} @@ -3168,53 +3372,54 @@ def gen_unpack(opts, op, typ, lang): # Entry point def doit(opts): - ulps = common.load_ulps_informations(opts) common.myprint(opts, 'Generating tests') for op_name, operator in operators.operators.items(): # Skip non-matching tests if opts.match and not opts.match.match(op_name): continue - if op_name in ['if_else1', 'loadu', 'loada', 'storeu', 'storea', - 'len', 'loadlu', 'loadla', 'storelu', 'storela', - 'set1', 'store2a', 'store2u', 'store3a', 'store3u', - 'store4a', 'store4u', 'downcvt', 'to_logical', - 'mask_for_loop_tail', 'set1l', 'scatter', - 'scatter_linear']: - continue for typ in operator.types: - if operator.name in ['notb', 'andb', 'xorb', 'orb', 'andnotb'] and \ - typ == 'f16': + if not should_i_do_the_test(operator, '', typ): continue elif operator.name == 'nbtrue': gen_nbtrue(opts, operator, typ, 'c_base') + gen_nbtrue(opts, operator, typ, 'c_adv') gen_nbtrue(opts, operator, typ, 'cxx_base') gen_nbtrue(opts, operator, typ, 'cxx_adv') elif operator.name == 'addv': if typ in common.ftypes: gen_addv(opts, operator, typ, 'c_base') + gen_addv(opts, operator, typ, 'c_adv') gen_addv(opts, operator, typ, 'cxx_base') gen_addv(opts, operator, typ, 'cxx_adv') elif operator.name == 'adds': - gen_adds(opts, operator, typ, 'c_base', ulps) - gen_adds(opts, operator, typ, 'cxx_base', ulps) - gen_adds(opts, operator, typ, 'cxx_adv', ulps) + gen_adds(opts, operator, typ, 'c_base') + gen_adds(opts, operator, typ, 'c_adv') + gen_adds(opts, operator, typ, 'cxx_base') + gen_adds(opts, operator, typ, 'cxx_adv') elif operator.name == 'subs': - gen_subs(opts, operator, typ, 'c_base', ulps) - gen_subs(opts, operator, typ, 'cxx_base', ulps) - gen_subs(opts, operator, typ, 'cxx_adv', ulps) + gen_subs(opts, operator, typ, 'c_base') + gen_subs(opts, operator, typ, 'c_adv') + gen_subs(opts, operator, typ, 'cxx_base') + gen_subs(opts, operator, typ, 'cxx_adv') elif operator.name in ['all', 'any']: gen_all_any(opts, operator, typ, 'c_base') + gen_all_any(opts, operator, typ, 'c_adv') gen_all_any(opts, operator, typ, 'cxx_base') gen_all_any(opts, operator, typ, 'cxx_adv') elif operator.name == 'iota': gen_iota(opts, operator, typ, 'c_base') + gen_iota(opts, operator, typ, 'c_adv') gen_iota(opts, operator, typ, 'cxx_base') gen_iota(opts, operator, typ, 'cxx_adv') elif operator.name in ['reinterpret', 'reinterpretl', 'cvt', 'upcvt', 'to_mask']: for to_typ in common.get_output_types(typ, operator.output_to): + if not should_i_do_the_test(operator, to_typ, typ): + continue gen_reinterpret_convert(opts, operator, typ, to_typ, 'c_base') + gen_reinterpret_convert(opts, operator, typ, to_typ, + 'c_adv') gen_reinterpret_convert(opts, operator, typ, to_typ, 'cxx_base') gen_reinterpret_convert(opts, operator, typ, to_typ, @@ -3222,44 +3427,54 @@ def doit(opts): elif operator.name in ['load2a', 'load2u', 'load3a', 'load3u', 'load4a', 'load4u']: gen_load_store(opts, operator, typ, 'c_base') + gen_load_store(opts, operator, typ, 'c_adv') gen_load_store(opts, operator, typ, 'cxx_base') gen_load_store(opts, operator, typ, 'cxx_adv') gen_load_store_ravel(opts, operator, typ, 'c_base') elif operator.name in ['gather', 'gather_linear']: gen_gather_scatter(opts, operator, typ, 'c_base') + gen_gather_scatter(opts, operator, typ, 'c_adv') gen_gather_scatter(opts, operator, typ, 'cxx_base') gen_gather_scatter(opts, operator, typ, 'cxx_adv') elif operator.name == 'mask_scatter': gen_mask_scatter(opts, operator, typ, 'c_base') + gen_mask_scatter(opts, operator, typ, 'c_adv') gen_mask_scatter(opts, operator, typ, 'cxx_base') gen_mask_scatter(opts, operator, typ, 'cxx_adv') elif operator.name in ['maskz_gather', 'masko_gather']: gen_maskoz_gather(opts, operator, typ, 'c_base') + gen_maskoz_gather(opts, operator, typ, 'c_adv') gen_maskoz_gather(opts, operator, typ, 'cxx_base') gen_maskoz_gather(opts, operator, typ, 'cxx_adv') elif operator.name in ['masko_loada1', 'masko_loadu1', 'maskz_loada1', 'maskz_loadu1']: gen_mask_load(opts, operator, typ, 'c_base') + gen_mask_load(opts, operator, typ, 'c_adv') gen_mask_load(opts, operator, typ, 'cxx_base') gen_mask_load(opts, operator, typ, 'cxx_adv') elif operator.name in ['mask_storea1', 'mask_storeu1']: gen_mask_store(opts, operator, typ, 'c_base') + gen_mask_store(opts, operator, typ, 'c_adv') gen_mask_store(opts, operator, typ, 'cxx_base') gen_mask_store(opts, operator, typ, 'cxx_adv') elif operator.name == 'reverse': gen_reverse(opts, operator, typ, 'c_base'); + gen_reverse(opts, operator, typ, 'c_adv'); gen_reverse(opts, operator, typ, 'cxx_base'); gen_reverse(opts, operator, typ, 'cxx_adv'); elif operator.name in ['ziplo', 'ziphi', 'unziplo', 'unziphi']: gen_unpack_half(opts, operator, typ, 'c_base') + gen_unpack_half(opts, operator, typ, 'c_adv') gen_unpack_half(opts, operator, typ, 'cxx_base') gen_unpack_half(opts, operator, typ, 'cxx_adv') elif operator.name in ['zip', 'unzip']: gen_unpack(opts, operator, typ, 'c_base') + gen_unpack(opts, operator, typ, 'c_adv') gen_unpack(opts, operator, typ, 'cxx_base') gen_unpack(opts, operator, typ, 'cxx_adv') else: - gen_test(opts, operator, typ, 'c_base', ulps) - gen_test(opts, operator, typ, 'cxx_base', ulps) - gen_test(opts, operator, typ, 'cxx_adv', ulps) + gen_test(opts, operator, typ, 'c_base') + gen_test(opts, operator, typ, 'c_adv') + gen_test(opts, operator, typ, 'cxx_base') + gen_test(opts, operator, typ, 'cxx_adv') diff --git a/egg/gen_ulps.py b/egg/gen_ulps.py deleted file mode 100644 index c0fc97e6..00000000 --- a/egg/gen_ulps.py +++ /dev/null @@ -1,319 +0,0 @@ -# Copyright (c) 2019 Agenium Scale -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# ----------------------------------------------------------------------------- -# Import section - -import gen_tests -import common -import operators -import os - -# ----------------------------------------------------------------------------- -# Includes - -includes = \ -''' -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -''' - -# ----------------------------------------------------------------------------- -# Random numbers generators - -random_f16_generator = \ -''' -u16 acc = 0; -for (i = 0; i < SIZE; i++) { - memcpy(&vin1[i], &acc, sizeof(u16)); - ++acc; -} -''' - -random_f32_generator = \ -''' -u32 acc = 0; -for (i = 0; i < SIZE; i++) { - memcpy(&vin1[i], &acc, sizeof(u32)); - acc+=(u32)((rand()%(16*16))+1); -} -''' - -random_f64_generator = \ -''' -for (i = 0; i < SIZE/2; ++i) { - double num = 0.; - - u64 *inum = reinterpret_cast(&num); - for (u64 j=0; j<64; ++j) { - u64 tmp = ((u64)(rand())%2u) << j; - *inum = *inum | tmp; - } - - vin1[i] = num; -} - -for (; i va1, vc; - va1 = nsimd::loadu >(&vin1[i]); - vc = nsimd::{nsimd_func}(va1); - nsimd::storeu(&vout1[i], vc); - }} - - int ulp = -{mantisse}; - double worst_rel = 0.; - int worst_value_index = 0; - - int ulp_dnz = -{mantisse}; - double worst_rel_dnz = 0.; - i64 worst_value_dnz_index = 0; - - int inf_error = false; - i64 inf_error_index = 0; - - int nan_error = false; - i64 nan_error_index = 0; - - /* Compare results */ - for (i = 0; i < SIZE; ++i) {{ - double rel = relative_distance((double){convert_from_type}(vout0[i]), - (double){convert_from_type}(vout1[i])); - - u64 hex_in = 0; - memcpy(&hex_in, &vin1[i], sizeof(u32)); - - {typ} mpfr_out = vout0[i]; - {typ} nsimd_out = vout1[i]; - - if (std::fpclassify({convert_from_type}(mpfr_out)) == FP_SUBNORMAL) {{ - // Result should be a subnormal float - if (std::fpclassify({convert_from_type}(nsimd_out)) == FP_SUBNORMAL) {{ - if (rel > worst_rel_dnz) {{ - worst_rel_dnz = rel; - worst_value_dnz_index = i; - ulp_dnz = (int) log2(rel); - }} - }} else if (std::fpclassify({convert_from_type}(nsimd_out)) == FP_ZERO) {{ - worst_rel_dnz = DBL_MAX; - worst_value_dnz_index = i; - ulp_dnz = 1; - }} - }} - else if (rel < 0) {{ - #ifdef DEBUG - printf("IN: %e 0x%lx\\t", (double){convert_from_type}(vin1[i]), hex_in); - printf("OUT: %e %e\\n", (double){convert_from_type}(vout0[i]), - (double){convert_from_type}(vout1[i])); - #endif - - if (std::fpclassify({convert_from_type}(mpfr_out)) == FP_NAN) {{ - nan_error = true; - nan_error_index = i; - }} else {{ - inf_error = true; - inf_error_index = i; - }} - - worst_rel = DBL_MAX; - }} else if (rel > worst_rel) {{ - #ifdef DEBUG - printf("IN: %e 0x%lx\\t", (double){convert_from_type}(vin1[i]), hex_in); - printf("OUT: %e %e\\n", (double){convert_from_type}(vout0[i]), - (double){convert_from_type}(vout1[i])); - #endif - ulp = (int) log2(rel); - worst_rel = rel; - worst_value_index = i; - }} - }} - - ulp = std::min(-ulp, {mantisse}); - ulp_dnz = std::min(-ulp_dnz, {mantisse}); - - u64 worst_value = 0, nan_value, inf_value, worst_value_dnz; - memcpy(&worst_value, &vin1[worst_value_index], sizeof({typ})); - memcpy(&nan_value, &vin1[nan_error_index], sizeof({typ})); - memcpy(&inf_value, &vin1[inf_error_index], sizeof({typ})); - memcpy(&worst_value_dnz, &vin1[worst_value_dnz_index], sizeof({typ})); - - fprintf(stdout, "{{\\n\\t" - "\\"func\\":\\"{nsimd_func}\\", " - "\\"type\\":\\"{typ}\\",\\n\\t" - "\\"ulps\\" : \\"%d\\", " - "\\"Worst value\\": \\"0x%lx\\",\\n\\t" - "\\"ulps for denormalized output\\" : \\"%d\\", " - "\\"Worst value for dnz output\\" : \\"0x%lx\\",\\n\\t" - "\\"NaN Error\\":\\"%s\\", " - "\\"Value causing NaN\\":\\"0x%lx\\",\\n\\t" - "\\"Inf Error\\":\\"%s\\", " - "\\"Value causing Inf error\\":\\"0x%lx\\"\\n" - "}}", - ulp, - worst_value, - ulp_dnz, - worst_value_dnz, - nan_error?"true":"false", - nan_value, - inf_error?"true":"false", - inf_value); - fflush(stdout); - - free(vin1); - free(vout0); - free(vout1); - - return 0; -}} -''' - -# ----------------------------------------------------------------------------- -# Entry point - -# TODO: redo a second pass after swaping numbers around -# (to avoid vector filled with similar numbers) - -def doit(opts): - common.myprint(opts, 'Generating ulps') - common.mkdir_p(opts.ulps_dir) - for op_name, operator in operators.operators.items(): - if not operator.tests_mpfr: - continue - if op_name in ['gammaln', 'lgamma', 'pow']: - continue - - mpfr_func = operator.tests_mpfr_name() - mpfr_rnd = ", MPFR_RNDN" - - for typ in common.ftypes: - if typ == 'f16': - random_generator = random_f16_generator - convert_to_type = "nsimd_f32_to_f16" - convert_from_type = "nsimd_f16_to_f32" - mantisse=10 - size = 0xffff - mpfr_suffix = "flt" - elif typ == 'f32': - convert_to_type = "(f32)" - convert_from_type = "" - random_generator = random_f32_generator - mantisse=23 - #size = 0xffffffff - size = 0x00ffffff - mpfr_suffix = "flt" - elif typ == 'f64': - convert_to_type = "(f64)" - convert_from_type = "" - random_generator = random_f64_generator - mantisse = 52 - size = 0x00ffffff - mpfr_suffix = "d" - else: - raise Exception('Unsupported type "{}"'.format(typ)) - - filename = os.path.join(opts.ulps_dir, '{}_{}_{}.cpp'. \ - format(op_name, "ulp", typ)); - - if not common.can_create_filename(opts, filename): - continue - - with common.open_utf8(opts, filename) as out: - out.write(includes) - out.write(gen_tests.relative_distance_cpp) - out.write(code.format( - typ = typ, - nsimd_func = op_name, - mpfr_func = mpfr_func, - mpfr_rnd = mpfr_rnd, - random_generator = random_generator, - convert_from_type = convert_from_type, - convert_to_type = convert_to_type, - mantisse = mantisse, - SIZE=size, - mpfr_suffix=mpfr_suffix)) - - common.clang_format(opts, filename) diff --git a/egg/get_sleef_code.py b/egg/get_sleef_code.py new file mode 100644 index 00000000..be4df555 --- /dev/null +++ b/egg/get_sleef_code.py @@ -0,0 +1,247 @@ +# Copyright (c) 2021 Agenium Scale +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import common +import shutil +import requests +import zipfile +import os + +# ----------------------------------------------------------------------------- + +def doit(opts): + common.myprint(opts, 'Copy native Sleef version {}'. \ + format(opts.sleef_version)) + + # First download Sleef + sleef_dir = os.path.join(opts.script_dir, '..', '_deps-sleef') + common.mkdir_p(sleef_dir) + url = 'https://github.com/shibatch/sleef/archive/refs/tags/{}.zip'. \ + format(opts.sleef_version) + r = requests.get(url, allow_redirects=True) + sleef_zip = os.path.join(sleef_dir, 'sleef.zip') + with open(sleef_zip, 'wb') as fout: + fout.write(r.content) + + # Unzip sleef + with zipfile.ZipFile(sleef_zip, 'r') as fin: + fin.extractall(path=sleef_dir) + + # Copy helper function + def copy(filename): + dst_filename = os.path.basename(filename) + shutil.copyfile(os.path.join(sleef_dir, + 'sleef-{}'.format(opts.sleef_version), + filename), os.path.join(opts.src_dir, + dst_filename)) + + # Copy files + copy('src/libm/sleefsimddp.c') + copy('src/libm/sleefsimdsp.c') + copy('src/libm/sleefdp.c') + copy('src/libm/sleefsp.c') + copy('src/common/misc.h') + copy('src/libm/estrin.h') + copy('src/libm/dd.h') + copy('src/libm/df.h') + copy('src/libm/rempitab.c') + copy('src/arch/helpersse2.h') + copy('src/arch/helperavx.h') + copy('src/arch/helperavx2.h') + copy('src/arch/helperavx512f.h') + copy('src/arch/helperneon32.h') + copy('src/arch/helperadvsimd.h') + copy('src/arch/helperpower_128.h') + copy('src/arch/helpersve.h') + + # Sleef uses aliases but we don't need those so we comment them + def comment_DALIAS_lines(filename): + src = os.path.join(opts.src_dir, filename) + dst = os.path.join(opts.src_dir, 'tmp.c') + with open(src, 'r') as fin, open(dst, 'w') as fout: + for line in fin: + if line.startswith('DALIAS_'): + fout.write('/* {} */\n'.format(line.strip())) + else: + fout.write(line) + shutil.copyfile(dst, src) + os.remove(dst) + comment_DALIAS_lines('sleefsimdsp.c') + comment_DALIAS_lines('sleefsimddp.c') + + # Sleef provides runtime SIMD detection via cpuid but we don't need it + def replace_x86_cpuid(filename): + src = os.path.join(opts.src_dir, filename) + dst = os.path.join(opts.src_dir, 'tmp.c') + with open(src, 'r') as fin, open(dst, 'w') as fout: + for line in fin: + if line.startswith('void Sleef_x86CpuID'): + fout.write( + '''static inline + void Sleef_x86CpuID(int32_t out[4], uint32_t eax, + uint32_t ecx) { + /* We don't care for cpuid detection */ + out[0] = 0xFFFFFFFF; + out[1] = 0xFFFFFFFF; + out[2] = 0xFFFFFFFF; + out[3] = 0xFFFFFFFF; + } + ''') + else: + fout.write(line) + shutil.copyfile(dst, src) + os.remove(dst) + replace_x86_cpuid('helpersse2.h') + replace_x86_cpuid('helperavx.h') + replace_x86_cpuid('helperavx2.h') + replace_x86_cpuid('helperavx512f.h') + + # Sleef uses force inline through its INLINE macro defined in misc.h + # We modify it to avoid warnings and because force inline has been a pain + # in the past. We also rename some exported symbols. + with open(os.path.join(opts.src_dir, 'misc.h'), 'a') as fout: + fout.write( + ''' + + /* NSIMD specific */ + #ifndef NSIMD_SLEEF_MISC_H + #define NSIMD_SLEEF_MISC_H + + #ifdef INLINE + #undef INLINE + #endif + #define INLINE inline + + #define Sleef_rempitabdp nsimd_sleef_rempitab_f64 + #define Sleef_rempitabsp nsimd_sleef_rempitab_f32 + + #endif + + ''') + + # Sleef functions must be renamed properly for each SIMD extensions. + # Moreover their name must contain their precision (in ULPs). This + # precision is not the same for all functions and some functions can have + # differents flavours (or precisions). The "database" is contained within + # src/libm/funcproto.h. So we parse it and produce names + # in headers "rename[SIMD ext].h" to avoid modifying Sleef C files. + funcproto = os.path.join(sleef_dir, 'sleef-{}'.format(opts.sleef_version), + 'src', 'libm', 'funcproto.h') + defines = [] + ulp_suffix = { + '0' : '', + '1' : '_u1', + '2' : '_u05', + '3' : '_u35', + '4' : '_u15', + '5' : '_u3500' + } + with open(funcproto, 'r') as fin: + for line in fin: + if (line.find('{') != -1 and line.find('}') != -1): + items = [item.strip() \ + for item in line.strip(' \n\r{},').split(',')] + items[0] = items[0].strip('"') + if items[0] == 'NULL': + break + sleef_name_f64 = items[0] + ulp_suffix[items[2]] + sleef_name_f32 = items[0] + 'f' + ulp_suffix[items[2]] + items[1] = items[1] if items[1] != '5' else '05' + if items[1] == '-1': + nsimd_name_f64 = 'nsimd_sleef_{}_{{nsimd_ext}}_f64'. \ + format(items[0]) + nsimd_name_f32 = 'nsimd_sleef_{}_{{nsimd_ext}}_f32'. \ + format(items[0]) + else: + nsimd_name_f64 = \ + 'nsimd_sleef_{}_u{}{{det}}_{{nsimd_ext}}_f64'. \ + format(items[0], items[1]) + nsimd_name_f32 = \ + 'nsimd_sleef_{}_u{}{{det}}_{{nsimd_ext}}_f32'. \ + format(items[0], items[1]) + defines.append('#define x{} {}'.format(sleef_name_f64, + nsimd_name_f64)) + defines.append('#define x{} {}'.format(sleef_name_f32, + nsimd_name_f32)) + defines = '\n'.join(defines) + + sleef_to_nsimd = { + '': ['scalar'], + 'sse2': ['sse2'], + 'sse4': ['sse42'], + 'avx': ['avx'], + 'avx2': ['avx2'], + 'avx512f': ['avx512_knl', 'avx512_skylake'], + 'neon32': ['neon128'], + 'advsimd': ['aarch64'], + 'sve': ['sve128', 'sve256', 'sve512', 'sve1024', 'sve2048'], + 'vsx': ['vmx', 'vsx'] + } + + for simd_ext in ['', 'sse2', 'sse4', 'avx', 'avx2', 'avx512f', 'neon32', + 'advsimd', 'sve', 'vsx']: + renameheader = os.path.join(opts.src_dir, + 'rename{}.h'.format(simd_ext)) + se = simd_ext if simd_ext != '' else 'scalar' + with open(renameheader, 'w') as fout: + fout.write( + '''#ifndef RENAME{SIMD_EXT}_H + #define RENAME{SIMD_EXT}_H + + '''.format(SIMD_EXT=se.upper())) + for nse in sleef_to_nsimd[simd_ext]: + ifdef = '' if simd_ext == '' \ + else '#ifdef NSIMD_{}'.format(nse.upper()) + endif = '' if simd_ext == '' else '#endif' + fout.write( + '''{hbar} + /* Naming of functions {nsimd_ext} */ + + {ifdef} + + #ifdef DETERMINISTIC + + {defines_det_f32} + + #else + + {defines_nondet_f32} + + #endif + + #define rempi nsimd_sleef_rempi_{nsimd_ext} + #define rempif nsimd_sleef_rempif_{nsimd_ext} + #define rempisub nsimd_sleef_rempisub_{nsimd_ext} + #define rempisubf nsimd_sleef_rempisubf_{nsimd_ext} + #define gammak nsimd_gammak_{nsimd_ext} + #define gammafk nsimd_gammafk_{nsimd_ext} + + {endif} + + '''.format(NSIMD_EXT=nse.upper(), nsimd_ext=nse, + hbar=common.hbar, ifdef=ifdef, endif=endif, + defines_det_f32=defines.format(det='d', nsimd_ext=nse), + defines_nondet_f32=defines.format(det='', nsimd_ext=nse), + defines_det_f64=defines.format(det='d', nsimd_ext=nse), + defines_nondet_f64=defines.format(det='', nsimd_ext=nse))) + + fout.write('\n\n#endif\n\n') + + common.clang_format(opts, renameheader) diff --git a/egg/hatch.py b/egg/hatch.py index 558c29b8..e5e5db24 100644 --- a/egg/hatch.py +++ b/egg/hatch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Agenium Scale +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,54 +26,6 @@ # `gen_*.py` file. This script simply calls the `doit` function of each # `gen_*.py` module. Names are self-explanatory. # -# The list of supported architectures is determined by looking at the `egg` -# directory and listing all `platform_*.py` files. Each file must contain all -# SIMD extensions for a given architecture. For example the default (no SIMD) is -# given by `platform_cpu.py`. All the Intel SIMD extensions are given by -# `platform_x86.py`. -# -# Each module that implements a platform: -# - must be named 'platform_[name for platform].py -# - must export at least the following functions -# -# * def get_type(simd_ext, typ) -# Returns the "intrinsic" SIMD type corresponding to the given -# arithmetic type. If typ or simd_ext is not known then a ValueError -# exception must be raised. -# -# * def get_additional_include(func, simd_ext, typ) -# Returns additional include if need be for the implementation of func for -# the given simd_ext and typ. -# -# * def get_logical_type(simd_ext, typ) -# Returns the "intrinsic" logical SIMD type corresponding to the given -# arithmetic type. If typ or simd_ext is not known then a ValueError -# exception must be raised. -# -# * def get_nb_registers(simd_ext) -# Returns the number of registers for this SIMD extension. -# -# * def get_impl(func, simd_ext, from_typ, to_typ) -# Returns the implementation (C code) for func on type typ for simd_ext. -# If typ or simd_ext is not known then a ValueError exception must be -# raised. Any func given satisfies `S func(T a0, T a1, ... T an)`. -# -# * def has_compatible_SoA_types(simd_ext) -# Returns True iff the given simd_ext has structure of arrays types -# compatible with NSIMD i.e. whose members are v1, v2, ... Returns False -# otherwise. If simd_ext is not known then a ValueError exception must be -# raised. -# -# * def get_SoA_type(simd_ext, typ, deg) -# Returns the structure of arrays types for the given typ, simd_ext and -# deg. If simd_ext is not known or does not name a type whose -# corresponding SoA types are compatible with NSIMD then a ValueError -# exception must be raised. -# -# * def emulate_fp16(simd_ext) -# Returns True iff the given SIMD extension has to emulate FP16's with -# two FP32's. - # ----------------------------------------------------------------------------- # First thing we do is check whether python3 is used @@ -91,15 +43,15 @@ import common import gen_archis import gen_base_apis -import gen_advanced_api +import gen_adv_cxx_api +import gen_adv_c_api import gen_tests -import gen_benches import gen_src import gen_doc import gen_friendly_but_not_optimized -import gen_ulps import gen_modules import gen_scalar_utilities +import get_sleef_code # Dir of this script script_dir = os.path.dirname(__file__) @@ -131,7 +83,7 @@ def parse_match(value): return None else: return re.compile(value) - # In pratice, we either generate all or all except benches and we never + # In pratice, we either generate all or all except tests and we never # change default directories for code generation. So we remove unused # options and regroup some into --library. parser = argparse.ArgumentParser( @@ -142,15 +94,13 @@ def parse_match(value): default=False, help='List files that will be created by hatch.py') parser.add_argument('--all', '-A', action='store_true', - help='Generate code for the library and its benches') + help='Generate code for the library and its tests') parser.add_argument('--library', '-l', action='store_true', help='Generate code of the library (C and C++ APIs)') - parser.add_argument('--ulps', '-u', action='store_true', - help='Generate code to compute precision on big functions') + parser.add_argument('--sleef', '-s', action='store_true', default=False, + help='Compile Sleef') parser.add_argument('--tests', '-t', action='store_true', help='Generate tests in C and C++') - parser.add_argument('--benches', '-b', action='store_true', - help='Generate benches in C and C++') parser.add_argument('--doc', '-d', action='store_true', help='Generate all documentation') parser.add_argument('--enable-clang-format', '-F', action='store_false', @@ -173,20 +123,19 @@ def parse_match(value): if opts.list_files: opts.library = True opts.tests = True - opts.benches = True opts.force = True opts.doc = True # We set variables here because all the code depends on them + we do want # to keep the possibility to change them in the future opts.archis = opts.library opts.base_apis = opts.library - opts.cxx_api = opts.library + opts.adv_cxx_api = opts.library + opts.adv_c_api = opts.library opts.friendly_but_not_optimized = opts.library opts.src = opts.library opts.scalar_utilities = opts.library - opts.ulps_dir = os.path.join(script_dir, '..', 'ulps') + opts.sleef_version = '3.5.1' opts.include_dir = os.path.join(script_dir, '..', 'include', 'nsimd') - opts.benches_dir = os.path.join(script_dir, '..', 'benches') opts.tests_dir = os.path.join(script_dir, '..', 'tests') opts.src_dir = os.path.join(script_dir, '..', 'src') return opts @@ -207,16 +156,16 @@ def main(): gen_archis.doit(opts) if opts.base_apis == True or opts.all == True: gen_base_apis.doit(opts) - if opts.cxx_api == True or opts.all == True: - gen_advanced_api.doit(opts) - if opts.ulps == True or opts.all == True: - gen_ulps.doit(opts) + if opts.adv_cxx_api == True or opts.all == True: + gen_adv_cxx_api.doit(opts) + if opts.adv_c_api == True or opts.all == True: + gen_adv_c_api.doit(opts) if opts.tests == True or opts.all == True: gen_tests.doit(opts) - if opts.benches == True or opts.all == True: - gen_benches.doit(opts) if opts.src == True or opts.all == True: gen_src.doit(opts) + if opts.sleef == True or opts.all == True: + get_sleef_code.doit(opts) if opts.scalar_utilities == True or opts.all == True: gen_scalar_utilities.doit(opts) if opts.friendly_but_not_optimized == True or opts.all == True: diff --git a/egg/modules/fixed_point/gen_doc.py b/egg/modules/fixed_point/gen_doc.py index c2065027..a7251a33 100644 --- a/egg/modules/fixed_point/gen_doc.py +++ b/egg/modules/fixed_point/gen_doc.py @@ -33,17 +33,6 @@ import common import operators -## List of the NSIMD operators currently suppported by the module -# op_list = [ -# 'len', 'set1', 'loadu', 'loada', 'loadlu', 'loadla', 'storeu', 'storea', -# 'add', 'sub', 'mul', 'div', 'fma', 'min', 'max', 'eq', 'ne', 'le', 'lt', -# 'ge', 'gt', 'if_else1', 'andb', 'andnotb', 'notb', 'orb', 'xorb', 'andl', -# 'andnotl', 'orl'] - -from modules.fixed_point.operators import * - -operators = fp_operators - # ------------------------------------------------------------------------------ def gen_overview(opts): @@ -158,91 +147,76 @@ def gen_overview(opts): # ----------------------------------------------------------------------------- -def get_type(param): - if param == 'V': - return 'void ' - elif param == 'T': - return 'T ' - elif param == 's': # Scalar parameter - return 'typename T::value_type ' - elif param == 'cs': # Const scalar parameter - return 'const typename T::value_type ' - elif param == 'cs&': # Const scalar parameter - return 'const typename T::value_type &' - elif param == 'cT&': - return 'const T &' - elif param == 's*': # Pointer to a scalar +def get_type(param, return_typ=False): + if param == '_': + return 'void' + elif param == '*': return 'typename T::value_type *' - elif param == 'v': # Vector type - return 'pack ' - elif param == 'v&': # Vector type ref - return 'pack &' - elif param == 'cv': # Const vector type - return 'const pack ' - elif param == 'cv&': # Const vector type reference - return 'const pack &' - elif param == 'vl': # Vector of logical type - return 'packl ' - elif param == 'vl&': # Vector of logical type reference - return 'packl &' - elif param == 'cvl': # Const vector of logical type - return 'const packl ' - elif param == 'cvl&': # Const vector of logical type reference - return 'const packl &' + elif param == 'c*': + return 'const typename T::value_type *' + elif param == 's': + return 'typename T::value_type' + elif param in 'v': + return 'pack' if return_typ else 'const pack &' + elif param == 'l': + return 'packl' if return_typ else 'const packl &' elif param == 'p': return 'int ' else: - return '' + return None # ----------------------------------------------------------------------------- def gen_decl(op): - ret = '' - op_sign = op.cxx_operator - for signature in op.signatures: - signature = signature.split(' ') - params = signature[2:] - args = ', '.join('{}{}'.format(get_type(params[i]),'a{}'.format(i)) \ - for i in range(0, len(params))) - decl_base = decl_template.format(ret=get_type(signature[0]), - op=signature[1], args=args) - decl_op = '' - if op_sign != '': - decl_op = decl_template.format(ret=get_type(signature[0]), - op='operator{}'.format(op_sign), - args=args) - ret += decl_base + decl_op - + sig = '{}{} {{}}({});'.format( + 'template ' \ + if 'v' not in op.params[1:] and \ + 'l' not in op.params[1:] else '', + get_type(op.params[0], True), + ', '.join(['{} {}'.format( + get_type(op.params[i + 1]), + common.get_arg(i)) \ + for i in range(len(op.params[1:]))]) + ) ret = 'namespace nsimd {\n' \ - +'namespace fixed_point {\n' \ - + ret \ - + '} // namespace fixed_point\n' \ - + '} // namespace nsimd' + 'namespace fixed_point {\n\n' + sig.format(op.name) + '\n\n' + if op.cxx_operator != None: + ret += sig.format('operator' + op.cxx_operator) + '\n\n' + ret += '} // namespace fixed_point\n' \ + '} // namespace nsimd' return ret # ----------------------------------------------------------------------------- -def gen_api(opts): +def gen_api(opts, op_list): + api = dict() + for _, operator in operators.operators.items(): + if operator.name not in op_list: + continue + for c in operator.categories: + if c not in api: + api[c] = [operator] + else: + api[c].append(operator) + filename = common.get_markdown_file(opts, 'api', 'fixed_point') with common.open_utf8(opts, filename) as fout: fout.write('''# NSIMD fixed point API\n''') - for cat in fp_categories: - ops = [op for op in fp_operators if cat in op.categories] - if(len(ops) == 0): + for c, ops in api.items(): + if len(ops) == 0: continue - - fout.write('\n## {}\n\n'.format(cat)) - + fout.write('\n## {}\n\n'.format(c.title)) for op in ops: - fout.write( - '- [{} ({})](module_fixed_point_api_{}.md)\n'\ - .format(op.full_name, op.name, - common.to_filename(op.name))) + fout.write('- [{} ({})](module_fixed_point_api_{}.md)\n'. \ + format(op.full_name, op.name, + common.to_filename(op.name))) # ----------------------------------------------------------------------------- -def gen_doc(opts): - for op in operators: +def gen_doc(opts, op_list): + for _, op in operators.operators.items(): + if op.name not in op_list: + continue filename = common.get_markdown_api_file(opts, op.name, 'fixed_point') with common.open_utf8(opts, filename) as fout: fout.write(api_template.format(full_name=op.full_name, @@ -250,9 +224,9 @@ def gen_doc(opts): # ----------------------------------------------------------------------------- -def doit(opts): +def doit(opts, op_list): common.myprint(opts, 'Generating doc for module fixed_point') gen_overview(opts) - gen_api(opts) - gen_doc(opts) + gen_api(opts, op_list) + gen_doc(opts, op_list) diff --git a/egg/modules/fixed_point/gen_tests.py b/egg/modules/fixed_point/gen_tests.py index 45350bcd..b51394b0 100644 --- a/egg/modules/fixed_point/gen_tests.py +++ b/egg/modules/fixed_point/gen_tests.py @@ -20,7 +20,6 @@ import os import sys -#sys.path.append("..") import common # ------------------------------------------------------------------------------- diff --git a/egg/modules/fixed_point/hatch.py b/egg/modules/fixed_point/hatch.py index 9d64c65c..0437b0eb 100644 --- a/egg/modules/fixed_point/hatch.py +++ b/egg/modules/fixed_point/hatch.py @@ -18,6 +18,47 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +## ----------------------------------------------------------------------------- + +op_list = [ + 'len', + 'set1', + 'loadu', + 'loada', + 'loadlu', + 'loadla', + 'storeu', + 'storea', + 'storelu', + 'storela', + 'add', + 'sub', + 'mul', + 'div', + 'fma', + 'min', + 'max', + 'abs', + 'rec', + 'eq', + 'ne', + 'le', + 'lt', + 'ge', + 'gt', + 'ifelse1', + 'andb', + 'andnotb', + 'notb', + 'orb', + 'xorb', + 'andl', + 'andnotl', + 'notl', + 'orl', + 'xorl' +] + # ----------------------------------------------------------------------------- # Imports @@ -45,4 +86,4 @@ def doit(opts): if opts.tests == True or opts.all == True: modules.fixed_point.gen_tests.doit(opts) if opts.doc == True or opts.all == True: - modules.fixed_point.gen_doc.doit(opts) + modules.fixed_point.gen_doc.doit(opts, op_list) diff --git a/egg/modules/fixed_point/operators.py b/egg/modules/fixed_point/operators.py deleted file mode 100644 index cef30f93..00000000 --- a/egg/modules/fixed_point/operators.py +++ /dev/null @@ -1,490 +0,0 @@ -# Use utf-8 encoding -# -*- coding: utf-8 -*- - -# Copyright (c) 2019 Agenium Scale -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import os -import sys -sys.path.append("..") -from common import * - -## ----------------------------------------------------------------------------- - -fp_operators = [] - -DocMisc = 'Miscellaneous operators' -DocLoadStore = 'Loads and stores' -DocBasicArithmetic = 'Basic arithmetic operators' -DocComparison = 'Comparison operators' -DocLogicalOperators = 'Logical operators' -DocBitsOperators = 'Bits manipulation operators' - -fp_categories = sorted([ - DocMisc, - DocLoadStore, - DocBasicArithmetic, - DocComparison, - DocLogicalOperators, - DocBitsOperators]) - -class FpOperator(object): - name = '' - full_name = '' - domain = Domain('R') - desc = '' - signatures = '' - categories = [] - cxx_operator = '' - -## ----------------------------------------------------------------------------- -# Len - -Len = FpOperator() -Len.name = 'len' -Len.full_name = 'Vector length' -Len.signatures = ['p len cT&', 'p len cv&'] -Len.domain = Domain('') -Len.categories = [DocMisc] -Len.desc = 'Returns the number of elements contained in a vector.' -fp_operators.append(Len) - -## ----------------------------------------------------------------------------- -# Set1 - -Set1 = FpOperator() -Set1.name = 'set1' -Set1.full_name = 'Value broadcast' -Set1.signatures = ['T set1 s'] -Set1.categories = [DocMisc] -Set1.desc = 'Returns a vector whose all elements are set to the given value.' -fp_operators.append(Set1) - -## ----------------------------------------------------------------------------- -# Loadu - -Loadu = FpOperator() -Loadu.name = 'loadu' -Loadu.full_name = 'Vector unaligned load' -Loadu.signatures = ['v loadu s*'] -Loadu.categories = [DocLoadStore] -Loadu.desc = 'Load data from unaligned memory.' -fp_operators.append(Loadu) - -## ----------------------------------------------------------------------------- -# Loada - -Loada = FpOperator() -Loada.name = 'loada' -Loada.full_name = 'Vector aligned load' -Loada.signatures = ['v loada s*'] -Loada.categories = [DocLoadStore] -Loada.desc = 'Load data from aligned memory.' -fp_operators.append(Loada) - -## ----------------------------------------------------------------------------- -# Loadlu - -Loadlu = FpOperator() -Loadlu.name = 'loadlu' -Loadlu.full_name = 'Logical vector unaligned load' -Loadlu.signatures = ['vl loadlu s*'] -Loadlu.categories = [DocLoadStore] -Loadlu.desc = 'Load logical data from unaligned memory.' -fp_operators.append(Loadlu) - -## ----------------------------------------------------------------------------- -# Loadla - -Loadla = FpOperator() -Loadla.name = 'loadla' -Loadla.full_name = 'Logical vector aligned load' -Loadla.signatures = ['vl loadla s*'] -Loadla.categories = [DocLoadStore] -Loadla.desc = 'Load logical data from aligned memory.' -fp_operators.append(Loadla) - -## ----------------------------------------------------------------------------- -# Storeu - -Storeu = FpOperator() -Storeu.name = 'storeu' -Storeu.full_name = 'Vector unaligned store' -Storeu.signatures = ['V storeu s* T'] -Storeu.categories = [DocLoadStore] -Storeu.desc = 'Store a vector in unaligned memory.' -fp_operators.append(Storeu) - -## ----------------------------------------------------------------------------- -# Storea - -Storea = FpOperator() -Storea.name = 'storea' -Storea.full_name = 'Vector aligned store' -Storea.signatures = ['V storea s* T'] -Storea.categories = [DocLoadStore] -Storea.desc = 'Store a vector in aligned memory.' -fp_operators.append(Storea) - -## ----------------------------------------------------------------------------- -# Storelu - -Storelu = FpOperator() -Storelu.name = 'storelu' -Storelu.full_name = 'Logical vector unaligned store' -Storelu.signatures = ['V storelu s* T'] -Storelu.categories = [DocLoadStore] -Storelu.desc = 'Store a logical vector in an unaligned memory.' -fp_operators.append(Storelu) - -## ----------------------------------------------------------------------------- -# Storela - -Storela = FpOperator() -Storela.name = 'storela' -Storela.full_name = 'Logical vector aligned store' -Storela.signatures = ['V storela s* T'] -Storela.categories = [DocLoadStore] -Storela.desc = 'Store a logical vector in an aligned memory.' -fp_operators.append(Storela) - -## ----------------------------------------------------------------------------- -# Add - -Add = FpOperator() -Add.name = 'add' -Add.full_name = 'Addition of two vectors' -Add.signatures = ['v add cv& cv&'] -Add.categories = [DocBasicArithmetic] -Add.desc = 'Adds two vectors.' -Add.cxx_operator = '+' -fp_operators.append(Add) - -## ----------------------------------------------------------------------------- -# Sub - -Sub = FpOperator() -Sub.name = 'sub' -Sub.full_name = 'Substraction of two vectors' -Sub.signatures = ['v sub cv& cv&'] -Sub.categories = [DocBasicArithmetic] -Sub.cxx_operator = '-' -Sub.desc = 'Substracts two vectors.' -fp_operators.append(Sub) - -## ----------------------------------------------------------------------------- -# Mul - -Mul = FpOperator() -Mul.name = 'mul' -Mul.full_name = 'Multplication of two vectors' -Mul.signatures = ['v mul cv& cv&'] -Mul.categories = [DocBasicArithmetic] -Mul.cxx_operator = '*' -Mul.desc = 'Multiplies two vectors.' -fp_operators.append(Mul) - -## ----------------------------------------------------------------------------- -# Div - -Div = FpOperator() -Div.name = 'div' -Div.full_name = 'Division of two vectors' -Div.signatures = ['v div cv& cv&'] -Div.categories = [DocBasicArithmetic] -Div.cxx_operator = '/' -Div.desc = 'Divides two vectors.' -fp_operators.append(Div) - -## ----------------------------------------------------------------------------- -# Fma - -Fma = FpOperator() -Fma.name = 'fma' -Fma.full_name = 'Fused multiplication and accumulation emulation' -Fma.signatures = ['v fma cv& cv& cv&'] -Fma.categories = [DocBasicArithmetic] -Fma.desc = 'Emulates the FMA operation with fixed-point arithmetic ' \ - + 'for compatibility.\n' \ - + 'This function is just a wrapper that calls consecutively an add then\n' \ - + 'a mul operation.' -fp_operators.append(Fma) - -## ----------------------------------------------------------------------------- -# Min - -Min = FpOperator() -Min.name = 'min' -Min.full_name = 'Minimum value' -Min.signatures = ['v min cv& cv&'] -Min.categories = [DocBasicArithmetic] -Min.desc = 'Returns a vector with the min values of the input vectors.' -fp_operators.append(Min) - -## ----------------------------------------------------------------------------- -# Max - -Max = FpOperator() -Max.name = 'max' -Max.full_name = 'Maximum value' -Max.signatures = ['v max cv& cv&'] -Max.categories = [DocBasicArithmetic] -Max.desc = 'Returns a vector with the max values of the input vectors.' -fp_operators.append(Max) - -## ----------------------------------------------------------------------------- -# Abs - -Abs = FpOperator() -Abs.name = 'abs' -Abs.full_name = 'Absolute value' -Abs.signatures = ['v abs cv&'] -Abs.categories = [DocBasicArithmetic] -Abs.desc = 'Absolute value of a fixed-point vector' -fp_operators.append(Abs) - -## ----------------------------------------------------------------------------- -# Rec - -Rec = FpOperator() -Rec.name = 'rec' -Rec.full_name = 'Reciprocal' -Rec.signatures = ['v rec cv&'] -Rec.categories = [DocBasicArithmetic] -Rec.desc = 'Reciprocal value of a fixed-point SIMD register.' -fp_operators.append(Rec) - -## ----------------------------------------------------------------------------- -# Eq - -Eq = FpOperator() -Eq.name = 'eq' -Eq.full_name = 'Compare for equality' -Eq.signatures = ['vl eq cv& cv&'] -Eq.categories = [DocComparison] -Eq.cxx_operator = '==' -Eq.desc = '''\ -Peforms an equality test between two fixed-point registers, and returns -the results of the test in a logical register. -''' -fp_operators.append(Eq) - -## ----------------------------------------------------------------------------- -# Ne - -Ne = FpOperator() -Ne.name = 'ne' -Ne.full_name = 'Compare for inequality' -Ne.signatures = ['vl ne cv& cv&'] -Ne.categories = [DocComparison] -Ne.cxx_operator = '!=' -Ne.desc = '''\ -Performs an inequality test between two fixed-point registers, and returns -the results on the test in a logical register. -''' -fp_operators.append(Ne) - -## ----------------------------------------------------------------------------- -# Le - -Le = FpOperator() -Le.name = 'le' -Le.full_name = 'Compare for lesser-or-equal-than' -Le.signatures = ['vl le cv& cv&'] -Le.categories = [DocComparison] -Le.cxx_operator = '<=' -Le.desc = '''\ -Performs a lesser-or-equal comparison between two fixed-point registers, and returns -the results of the test in a logical vector. -''' -fp_operators.append(Le) - -## ----------------------------------------------------------------------------- -# Lt - -Lt = FpOperator() -Lt.name = 'lt' -Lt.full_name = 'Compare for lesser-than' -Lt.signatures = ['vl lt cv& cv&'] -Lt.categories = [DocComparison] -Lt.cxx_operator = '<' -Lt.desc = '''\ -Performs a lesser-than comparison between two fixed-point registers, and returns -the results of the test in a logical vector. -''' -fp_operators.append(Lt) - -## ----------------------------------------------------------------------------- -# e - -Ge = FpOperator() -Ge.name = 'ge' -Ge.full_name = 'Compare for greater-or-equal-than' -Ge.signatures = ['vl ge cv& cv&'] -Ge.categories = [DocComparison] -Ge.cxx_operator = '>=' -Ge.desc = '''\ -Performs a greater-or-equal-than comparison between two fixed-point registers, and returns -the results of the test in a logical vector. -''' -fp_operators.append(Ge) - -## ----------------------------------------------------------------------------- -# Gt - -Gt = FpOperator() -Gt.name = 'gt' -Gt.full_name = 'Compare for greater-than' -Gt.signatures = ['vl gt cv& cv&'] -Gt.categories = [DocComparison] -Gt.cxx_operator = '>' -Gt.desc = '''\ -Performs a greater-than comparison between two fixed-point registers, and returns -the results of the test in a logical vector. -''' -fp_operators.append(Gt) - -## ----------------------------------------------------------------------------- -# IfElse1 - -IfElse = FpOperator() -IfElse.name = 'if_else1' -IfElse.full_name = 'Vector blending' -IfElse.signatures = ['vl if_else1 cv& cv&'] -IfElse.categories = [DocMisc] -IfElse.desc = '''\ -Blend the inputs using the vector of logical as a first argument. -Elements of the second input is taken when the corresponding elements from the vector -of logicals is true, otherwise elements of the second input are taken. -''' -fp_operators.append(IfElse) - -## ----------------------------------------------------------------------------- -# Andb - -Andb = FpOperator() -Andb.name = 'andb' -Andb.full_name = 'Bitwise and' -Andb.signatures = ['v andb cv& cv&'] -Andb.categories = [DocBitsOperators] -Andb.desc = 'Bitwise and between two fixed-point SIMD registers.' -fp_operators.append(Andb) - -## ----------------------------------------------------------------------------- -# Andnotb - -Andnotb = FpOperator() -Andnotb.name = 'andnotb' -Andnotb.full_name = 'Bitwise and not' -Andnotb.signatures = ['v andnotb cv& cv&'] -Andnotb.categories = [DocBitsOperators] -Andnotb.desc = 'Bitwise and not between two fixed-point SIMD registers.' -fp_operators.append(Andnotb) - -## ----------------------------------------------------------------------------- -# Norb - -Notb = FpOperator() -Notb.name = 'notb' -Notb.full_name = 'Bitwise not' -Notb.signatures = ['v notb cv&'] -Notb.categories = [DocBitsOperators] -Notb.desc = 'Not operator on a fixed-point SIMD register.' -fp_operators.append(Notb) - -## ----------------------------------------------------------------------------- -# Orb - -Orb = FpOperator() -Orb.name = 'orb' -Orb.full_name = 'Bitwise or' -Orb.signatures = ['v orb cv& cv&'] -Orb.categories = [DocBitsOperators] -Orb.desc = 'Bitwise or between two fixed-point SIMD registers.' -fp_operators.append(Orb) - -## ----------------------------------------------------------------------------- -# Xorb -Xorb = FpOperator() -Xorb.name = 'xorb' -Xorb.full_name = 'Bitwise xor' -Xorb.signatures = ['v xorb cv& cv&'] -Xorb.categories = [DocBitsOperators] -Xorb.desc = 'Bitwise xor between two fixed-point SIMD registers.' -fp_operators.append(Xorb) - -## ----------------------------------------------------------------------------- -# Andl - -Andl = FpOperator() -Andl.name = 'andl' -Andl.full_name = 'Bitwise logical and' -Andl.signatures = ['vl andl cvl& cvl&'] -Andl.categories = [DocLogicalOperators] -Andl.desc = 'Bitwise and between two logical SIMD registers.' -fp_operators.append(Andl) - -## ----------------------------------------------------------------------------- -# Andnotl - -Andnotl = FpOperator() -Andnotl.name = 'andnotl' -Andnotl.full_name = 'Bitwise and not' -Andnotl.signatures = ['vl andnotl cvl& cvl&'] -Andnotl.categories = [DocLogicalOperators] -Andnotl.desc = 'Bitwise and not between two logical SIMD registers.' -fp_operators.append(Andnotl) - -## ----------------------------------------------------------------------------- -# Notl - -Notl = FpOperator() -Notl.name = 'notl' -Notl.full_name = 'Bitwise not' -Notl.signatures = ['vl notb cvl&'] -Notl.categories = [DocLogicalOperators] -Notl.desc = 'Not operator on a logical SIMD register.' -fp_operators.append(Notl) - -## ----------------------------------------------------------------------------- -# Orl - -Orl = FpOperator() -Orl.name = 'orl' -Orl.full_name = 'Bitwise or' -Orl.signatures = ['vl orb cvl& cvl&'] -Orl.categories = [DocLogicalOperators] -Orl.desc = 'Bitwise or between two logical SIMD registers.' -fp_operators.append(Orl) - -## ----------------------------------------------------------------------------- -# Xorl - -Xorl = FpOperator() -Xorl.name = 'xorl' -Xorl.full_name = 'Bitwise xor' -Xorl.signatures = ['vl xorb cvl& cvl&'] -Xorl.categories = [DocLogicalOperators] -Xorl.desc = 'Bitwise xor between two logical SIMD registers.' -fp_operators.append(Xorl) - -## ----------------------------------------------------------------------------- - -fp_operators = sorted(fp_operators, key=lambda op: op.name) diff --git a/egg/modules/spmd/hatch.py b/egg/modules/spmd/hatch.py index ecdb5853..d3169fc4 100644 --- a/egg/modules/spmd/hatch.py +++ b/egg/modules/spmd/hatch.py @@ -21,9 +21,8 @@ import os import operators import common -import cuda import gen_scalar_utilities -#import hip +import gen_tests as nsimd_tests # ----------------------------------------------------------------------------- # CUDA: default number of threads per block @@ -441,14 +440,6 @@ def gen_tests_for_shifts(opts, t, operator): filename = os.path.join(dirname, '{}.{}.cpp'.format(op_name, t)) if not common.can_create_filename(opts, filename): return - - if op_name in ['rec11', 'rsqrt11']: - comp = '!cmp(ref, out, n, .0009765625 /* = 2^-10 */)' - elif op_name in ['rec8', 'rsqrt8']: - comp = '!cmp(ref, out, n, .0078125 /* = 2^-7 */)' - else: - comp = '!cmp(ref, out, n)' - with common.open_utf8(opts, filename) as out: out.write( '''#include @@ -469,6 +460,8 @@ def gen_tests_for_shifts(opts, t, operator): kernel<<<{gpu_params}>>>(dst, a0, int(n), s); }} + {cbprng_cuda} + #elif defined(NSIMD_ROCM) __global__ void kernel({typ} *dst, {typ} *a0, size_t n, int s) {{ @@ -482,6 +475,30 @@ def gen_tests_for_shifts(opts, t, operator): hipLaunchKernelGGL(kernel, {gpu_params}, 0, 0, dst, a0, n, s); }} + {cbprng_hip} + + #elif defined(NSIMD_ONEAPI) + + inline void kernel({typ} *dst, {typ} *a0, const size_t n, + const int s, sycl::nd_item<1> item) {{ + const size_t ii = item.get_global_id().get(0); + if (ii < n){{ + dst[ii] = nsimd::gpu_{op_name}(a0[ii], s); + }} + }} + + void compute_result({typ} *dst, {typ} *a0, size_t n, int s) {{ + size_t total_num_threads = (size_t)nsimd_kernel_param((int)n, {tpb}); + sycl::queue q_ = nsimd::oneapi::default_queue(); + q_.parallel_for(sycl::nd_range<1>(sycl::range<1>(total_num_threads), + sycl::range<1>({tpb})), + [=](sycl::nd_item<1> item){{ + kernel(dst, a0, n, s, item); + }}).wait_and_throw(); + }} + + {cbprng_oneapi} + #else void compute_result({typ} *dst, {typ} *a0, unsigned int n, int s) {{ @@ -490,13 +507,12 @@ def gen_tests_for_shifts(opts, t, operator): }} }} + {cbprng_cpu} + #endif // clang-format off - nsimd_fill_dev_mem_func(prng7, - 1 + (((unsigned int)i * 22328380 + 644295) % 7)) - spmd_kernel_1d(kernel, {typ} *dst, {typ} *a0, int s) k_store(dst, k_{op_name}(k_load(a0), s)); spmd_kernel_end @@ -510,12 +526,12 @@ def gen_tests_for_shifts(opts, t, operator): for (int s = 0; s < {typnbits}; s++) {{ int ret = 0; {typ} *a0 = nsimd::device_calloc<{typ}>(n); - prng7(a0, n); + random(a0, n, 0); {typ} *ref = nsimd::device_calloc<{typ}>(n); {typ} *out = nsimd::device_calloc<{typ}>(n); spmd_launch_kernel_1d(kernel, {typnbits}, 1, n, out, a0, s); compute_result(ref, a0, n, s); - if ({comp}) {{ + if (!cmp(ref, out, n)) {{ ret = -1; }} nsimd::device_free(a0); @@ -528,14 +544,21 @@ def gen_tests_for_shifts(opts, t, operator): }} return 0; }} - '''.format(typ=t, op_name=op_name, typnbits=t[1:], comp=comp, + '''.format(typ=t, op_name=op_name, typnbits=t[1:], tpb=tpb, + cbprng_cpu=nsimd_tests.cbprng(t, operator, 'cpu'), + cbprng_cuda=nsimd_tests.cbprng(t, operator, 'cuda', + gpu_params), + cbprng_hip=nsimd_tests.cbprng(t, operator, 'hip', + gpu_params), + cbprng_oneapi=nsimd_tests.cbprng(t, operator, 'oneapi', + ['(int)n', str(tpb)]), gpu_params=gpu_params)) common.clang_format(opts, filename, cuda=True) # ----------------------------------------------------------------------------- -def gen_tests_for_cvt_reinterpret(opts, t, tt, operator): +def gen_tests_for_cvt_reinterpret(opts, tt, t, operator): op_name = operator.name dirname = os.path.join(opts.tests_dir, 'modules', 'spmd') common.mkdir_p(dirname) @@ -564,6 +587,8 @@ def gen_tests_for_cvt_reinterpret(opts, t, tt, operator): kernel<<<{gpu_params}>>>(dst, a0, int(n)); }} + {cbprng_cuda} + #elif defined(NSIMD_ROCM) __global__ void kernel({typ} *dst, {typ} *a0, size_t n) {{ @@ -578,6 +603,31 @@ def gen_tests_for_cvt_reinterpret(opts, t, tt, operator): hipLaunchKernelGGL(kernel, {gpu_params}, 0, 0, dst, a0, n); }} + {cbprng_hip} + + #elif defined(NSIMD_ONEAPI) + + inline void kernel({typ} *dst, {typ} *a0, const size_t n, + sycl::nd_item<1> item) {{ + const size_t ii = item.get_global_id().get(0); + if (ii < n){{ + dst[ii] = nsimd::gpu_{op_name}({typ}(), nsimd::gpu_{op_name}( + {totyp}(), a0[ii])); + }} + }} + + void compute_result({typ} *dst, {typ} *a0, size_t n) {{ + size_t total_num_threads = (size_t)nsimd_kernel_param((int)n, {tpb}); + sycl::queue q_ = nsimd::oneapi::default_queue(); + q_.parallel_for(sycl::nd_range<1>(sycl::range<1>(total_num_threads), + sycl::range<1>({tpb})), + [=](sycl::nd_item<1> item){{ + kernel(dst, a0, n, item); + }}).wait_and_throw(); + }} + + {cbprng_oneapi} + #else void compute_result({typ} *dst, {typ} *a0, unsigned int n) {{ @@ -587,13 +637,12 @@ def gen_tests_for_cvt_reinterpret(opts, t, tt, operator): }} }} + {cbprng_cpu} + #endif // clang-format off - nsimd_fill_dev_mem_func(prng7, - 1 + (((unsigned int)i * 22328380 + 644295) % 7)) - spmd_kernel_1d(kernel, {typ} *dst, {typ} *a0) k_store(dst, k_{op_name}({k_typ}, k_{op_name}({k_totyp}, k_load(a0)))); @@ -607,7 +656,7 @@ def gen_tests_for_cvt_reinterpret(opts, t, tt, operator): unsigned int n = n_[i]; int ret = 0; {typ} *a0 = nsimd::device_calloc<{typ}>(n); - prng7(a0, n); + random(a0, n, 0); {typ} *ref = nsimd::device_calloc<{typ}>(n); {typ} *out = nsimd::device_calloc<{typ}>(n); spmd_launch_kernel_1d(kernel, {typnbits}, 1, n, out, a0); @@ -625,7 +674,13 @@ def gen_tests_for_cvt_reinterpret(opts, t, tt, operator): return 0; }} '''.format(typ=t, totyp=tt, op_name=op_name, typnbits=t[1:], - gpu_params=gpu_params, k_typ=k_typ[t[0]], + gpu_params=gpu_params, k_typ=k_typ[t[0]], tpb=tpb, + cbprng_cpu=nsimd_tests.cbprng(t, operator, 'cpu'), + cbprng_cuda=nsimd_tests.cbprng(t, operator, 'cuda'), + cbprng_hip=nsimd_tests.cbprng(t, operator, 'hip', + gpu_params), + cbprng_oneapi=nsimd_tests.cbprng(t, operator, 'oneapi', + ['(int)n', str(tpb)]), k_totyp=k_typ[tt[0]])) common.clang_format(opts, filename, cuda=True) @@ -645,8 +700,7 @@ def gen_tests_for(opts, t, operator): k_call_args = ', '.join(['a{}'.format(i) for i in range(arity)]) fill_tabs = '\n'.join(['{typ} *a{i} = nsimd::device_calloc<{typ}>(n);\n' \ - 'prng{ip5}(a{i}, n);'. \ - format(typ=t, i=i, ip5=i + 5) \ + 'random(a{i}, n, {i});'.format(typ=t, i=i) \ for i in range(arity)]) free_tabs = '\n'.join(['nsimd::device_free(a{i});'. \ @@ -679,32 +733,44 @@ def spmd_load_code(param, typ, i): k_endif'''.format(op_name, args) # gpu - def get_cte_gpu(typ, cte): - if typ == 'f16': + def get_cte_gpu(typ, cte, target): + if typ == 'f16' and target == 'cuda_rocm': return '__float2half((f32){})'.format(cte) else: return '({}){}'.format(typ, cte) - def gpu_load_code(param, typ, i): + def gpu_load_code(param, typ, i, target): if param == 'l': - return 'nsimd::gpu_lt(a{}[i], {})'.format(i, get_cte_gpu(typ, 4)) + return 'nsimd::gpu_lt(a{}[i], {})'. \ + format(i, get_cte_gpu(typ, 4, target)) if param == 'v': return 'a{}[i]'.format(i) - args = ', '.join([gpu_load_code(operator.params[i + 1], t, i) \ - for i in range(arity)]) + args_cuda_rocm = ', '.join([gpu_load_code(operator.params[i + 1], t, i, + 'cuda_rocm') \ + for i in range(arity)]) + args_oneapi = ', '.join([gpu_load_code(operator.params[i + 1], t, i, + 'oneapi') for i in range(arity)]) if op_name == 'to_mask': - args = t + '(), ' + args + args_cuda_rocm = t + '(), ' + args_cuda_rocm + args_oneapi = t + '(), ' + args_oneapi if operator.params[0] == 'v': - gpu_kernel = 'dst[i] = nsimd::gpu_{}({});'.format(op_name, args) + cuda_rocm_kernel = 'dst[i] = nsimd::gpu_{}({});'. \ + format(op_name, args_cuda_rocm) + oneapi_kernel = 'dst[i] = nsimd::gpu_{}({});'. \ + format(op_name, args_oneapi) else: - gpu_kernel = '''if (nsimd::gpu_{op_name}({args})) {{ - dst[i] = {one}; - }} else {{ - dst[i] = {zero}; - }}'''.format(op_name=op_name, args=args, - one=get_cte_gpu(t, 1), - zero=get_cte_gpu(t, 0)) + tmpl = '''if (nsimd::gpu_{}({{}})) {{{{ + dst[i] = {{}}; + }}}} else {{{{ + dst[i] = {{}}; + }}}}'''.format(op_name) + cuda_rocm_kernel = tmpl.format(args_cuda_rocm, + get_cte_gpu(t, 1, 'cuda_rocm'), + get_cte_gpu(t, 0, 'cuda_rocm')) + oneapi_kernel = tmpl.format(args_oneapi, + get_cte_gpu(t, 1, 'oneapi'), + get_cte_gpu(t, 0, 'oneapi')) # cpu def get_cte_cpu(typ, cte): @@ -713,14 +779,14 @@ def get_cte_cpu(typ, cte): else: return '({}){}'.format(typ, cte) - def gpu_load_code(param, typ, i): + def cpu_load_code(param, typ, i): if param == 'l': return 'nsimd::scalar_lt(a{}[i], {})'. \ format(i, get_cte_cpu(typ, 4)) if param == 'v': return 'a{}[i]'.format(i) - args = ', '.join([gpu_load_code(operator.params[i + 1], t, i) \ + args = ', '.join([cpu_load_code(operator.params[i + 1], t, i) \ for i in range(arity)]) if op_name == 'to_mask': args = t + '(), ' + args @@ -735,12 +801,8 @@ def gpu_load_code(param, typ, i): one=get_cte_cpu(t, 1), zero=get_cte_cpu(t, 0)) - if op_name in ['rec11', 'rsqrt11']: - comp = '!cmp(ref, out, n, .0009765625 /* = 2^-10 */)' - elif op_name in ['rec8', 'rsqrt8']: - comp = '!cmp(ref, out, n, .0078125 /* = 2^-7 */)' - else: - comp = '!cmp(ref, out, n)' + comp = '!cmp(ref, out, n{})'.format('' if t in common.iutypes \ + else ', {}'.format(operator.ufp[t])) with common.open_utf8(opts, filename) as out: out.write( @@ -754,7 +816,7 @@ def gpu_load_code(param, typ, i): __global__ void kernel({typ} *dst, {k_args}, int n) {{ int i = threadIdx.x + blockIdx.x * blockDim.x; if (i < n) {{ - {gpu_kernel} + {cuda_rocm_kernel} }} }} @@ -762,12 +824,14 @@ def gpu_load_code(param, typ, i): kernel<<<{gpu_params}>>>(dst, {k_call_args}, int(n)); }} + {cbprng_cuda} + #elif defined(NSIMD_ROCM) __global__ void kernel({typ} *dst, {k_args}, size_t n) {{ size_t i = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if (i < n) {{ - {gpu_kernel} + {cuda_rocm_kernel} }} }} @@ -776,6 +840,30 @@ def gpu_load_code(param, typ, i): n); }} + {cbprng_hip} + + #elif defined(NSIMD_ONEAPI) + + inline void kernel({typ} *dst, {k_args}, const size_t n, + sycl::nd_item<1> item) {{ + const size_t i = item.get_global_id().get(0); + if(i < n){{ + {oneapi_kernel} + }} + }} + + void compute_result({typ} *dst, {k_args}, size_t n) {{ + size_t total_num_threads = (size_t)nsimd_kernel_param((int)n, {tpb}); + sycl::queue q_ = nsimd::oneapi::default_queue(); + q_.parallel_for(sycl::nd_range<1>(sycl::range<1>(total_num_threads), + sycl::range<1>({tpb})), + [=](sycl::nd_item<1> item){{ + kernel(dst, {k_call_args}, n, item); + }}).wait_and_throw(); + }} + + {cbprng_oneapi} + #else void compute_result({typ} *dst, {k_args}, unsigned int n) {{ @@ -784,24 +872,19 @@ def gpu_load_code(param, typ, i): }} }} + {cbprng_cpu} + #endif // clang-format off - nsimd_fill_dev_mem_func(prng5, - 1 + (((unsigned int)i * 69342380 + 414585) % 5)) - nsimd_fill_dev_mem_func(prng6, - 1 + (((unsigned int)i * 12528380 + 784535) % 6)) - nsimd_fill_dev_mem_func(prng7, - 1 + (((unsigned int)i * 22328380 + 644295) % 7)) - spmd_kernel_1d(kernel, {typ} *dst, {k_args}) {k_code} spmd_kernel_end // clang-format on - #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) + #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || defined(NSIMD_ONEAPI) #define THREADS_PER_BLOCK 128 #else #define THREADS_PER_BLOCK 1 @@ -832,8 +915,17 @@ def gpu_load_code(param, typ, i): }} '''.format(typ=t, free_tabs=free_tabs, fill_tabs=fill_tabs, k_code=k_code, k_call_args=k_call_args, k_args=k_args, - cpu_kernel=cpu_kernel, gpu_kernel=gpu_kernel, comp=comp, - gpu_params=gpu_params, typnbits=t[1:])) + cpu_kernel=cpu_kernel, comp=comp, + cuda_rocm_kernel=cuda_rocm_kernel, + oneapi_kernel=oneapi_kernel, + cbprng_cpu=nsimd_tests.cbprng(t, operator, 'cpu'), + cbprng_cuda=nsimd_tests.cbprng(t, operator, 'cuda', + gpu_params), + cbprng_hip=nsimd_tests.cbprng(t, operator, 'hip', + gpu_params), + cbprng_oneapi=nsimd_tests.cbprng(t, operator, 'oneapi', + ['(int)n', str(tpb)]), + gpu_params=gpu_params, typnbits=t[1:], tpb=tpb)) common.clang_format(opts, filename, cuda=True) @@ -841,22 +933,13 @@ def gen_tests(opts): for op_name, operator in operators.operators.items(): if not operator.has_scalar_impl: continue - not_closed = (operator.output_to == common.OUTPUT_TO_SAME_SIZE_TYPES \ or ('v' not in operator.params[1:] and 'l' not in operator.params[1:])) - for t in operator.types: - - if operator.name in ['notb', 'andb', 'xorb', 'orb', - 'andnotb'] and t == 'f16': - continue - tts = common.get_output_types(t, operator.output_to) - for tt in tts: - if t == 'f16' and op_name in ['notb', 'andnotb', 'orb', - 'xorb', 'andb']: + if not nsimd_tests.should_i_do_the_test(operator, tt, t): continue if operator.name in ['shl', 'shr', 'shra']: gen_tests_for_shifts(opts, t, operator) @@ -952,7 +1035,8 @@ def v_call_arg(typ, i): s_tmpl = 'template <{}>'.format(s_tmpl) functions += \ - '''#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) + '''#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || \ + defined(NSIMD_ONEAPI) {signature} nsimd::gpu_{s_op_name}({m_call_args_gpu}) @@ -1016,7 +1100,7 @@ def desc(): return '''SPMD programming allows the programmer to focus on kernels and the compiler to vectorize kernel code more effectively. Basically this module provides a "à la CUDA" programming C++ DSL to targets CPU SIMD as well -as NVIDIA and AMD GPUs.''' +as Intel, NVIDIA and AMD GPUs.''' def doc_menu(): return {'Overview': 'overview', 'API reference': 'api'} diff --git a/egg/modules/tet1d/hatch.py b/egg/modules/tet1d/hatch.py index 10ee3764..fcab4844 100644 --- a/egg/modules/tet1d/hatch.py +++ b/egg/modules/tet1d/hatch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Agenium Scale +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,9 +21,8 @@ import os import operators import common -import cuda import gen_scalar_utilities -#import hip +import gen_tests as nsimd_tests # ----------------------------------------------------------------------------- # CUDA: default number of threads per block @@ -308,6 +307,8 @@ def gen_tests_for_shifts(opts, t, operator): kernel<<<{gpu_params}>>>(dst, tab0, int(n), s); }} + {cbprng_cuda} + #elif defined(NSIMD_ROCM) __global__ void kernel({t} *dst, {t} *tab0, size_t n, int s) {{ @@ -321,6 +322,30 @@ def gen_tests_for_shifts(opts, t, operator): hipLaunchKernelGGL(kernel, {gpu_params}, 0, 0, dst, tab0, n, s); }} + {cbprng_hip} + + #elif defined(NSIMD_ONEAPI) + + inline void kernel({t} *dst, {t} *tab0, const size_t n, + const int s, sycl::nd_item<1> item) {{ + size_t ii = item.get_global_id().get(0); + if (ii < n){{ + dst[ii] = nsimd::gpu_{op_name}(tab0[ii], s); + }} + }} + + void compute_result({t} *dst, {t} *tab0, size_t n, int s) {{ + size_t total_num_threads = (size_t)nsimd_kernel_param((int)n, {tpb}); + sycl::queue q_ = nsimd::oneapi::default_queue(); + q_.parallel_for(sycl::nd_range<1>(sycl::range<1>(total_num_threads), + sycl::range<1>({tpb})), + [=](sycl::nd_item<1> item){{ + kernel(dst, tab0, n, s, item); + }}).wait_and_throw(); + }} + + {cbprng_oneapi} + #else void compute_result({t} *dst, {t} *tab0, unsigned int n, int s) {{ @@ -329,10 +354,9 @@ def gen_tests_for_shifts(opts, t, operator): }} }} - #endif + {cbprng_cpu} - nsimd_fill_dev_mem_func(prng5, - 1 + (((unsigned int)i * 69342380 + 414585) % 5)) + #endif int main() {{ unsigned int n_[3] = {{ 10, 1001, 10001 }}; @@ -341,7 +365,7 @@ def gen_tests_for_shifts(opts, t, operator): for (int s = 0; s < {typnbits}; s++) {{ int ret = 0; {t} *tab0 = nsimd::device_calloc<{t}>(n); - prng5(tab0, n); + random(tab0, n, 0); {t} *ref = nsimd::device_calloc<{t}>(n); {t} *out = nsimd::device_calloc<{t}>(n); compute_result(ref, tab0, n, s); @@ -360,10 +384,17 @@ def gen_tests_for_shifts(opts, t, operator): return 0; }} '''.format(gpu_params=gpu_params, op_name=op_name, t=t, - typnbits=t[1:])) + typnbits=t[1:], tpb=tpb, + cbprng_cpu=nsimd_tests.cbprng(t, operator, 'cpu'), + cbprng_cuda=nsimd_tests.cbprng(t, operator, 'cuda', + gpu_params), + cbprng_hip=nsimd_tests.cbprng(t, operator, 'hip', + gpu_params), + cbprng_oneapi=nsimd_tests.cbprng(t, operator, 'oneapi', + ['(int)n', str(tpb)]))) common.clang_format(opts, filename, cuda=True) -def gen_tests_for(opts, t, tt, operator): +def gen_tests_for(opts, tt, t, operator): op_name = operator.name dirname = os.path.join(opts.tests_dir, 'modules', 'tet1d') common.mkdir_p(dirname) @@ -384,8 +415,7 @@ def gen_tests_for(opts, t, tt, operator): for i in range(arity)]) fill_tabs = '\n'.join(['{typ} *tab{i} = nsimd::device_calloc<{typ}>(n);\n' \ - 'prng{ip5}(tab{i}, n);'. \ - format(typ=t, i=i, ip5=i + 5) \ + 'random(tab{i}, n, {i});'.format(typ=t, i=i) \ for i in range(arity)]) free_tabs = '\n'.join(['nsimd::device_free(tab{i});'. \ @@ -497,16 +527,15 @@ def gen_tests_for(opts, t, tt, operator): cpu_kernel = compute_result_kernel.format(p='scalar', f32_to_f16='nsimd_f32_to_f16', f16_to_f32='nsimd_f16_to_f32') - gpu_kernel = compute_result_kernel.format(p='gpu', - f32_to_f16='__float2half', - f16_to_f32='__half2float') - - if op_name in ['rec11', 'rsqrt11']: - comp = '!cmp(ref, out, n, .0009765625 /* = 2^-10 */)' - elif op_name in ['rec8', 'rsqrt8']: - comp = '!cmp(ref, out, n, .0078125 /* = 2^-7 */)' - else: - comp = '!cmp(ref, out, n)' + cuda_rocm_kernel = compute_result_kernel.format(p='gpu', + f32_to_f16='__float2half', + f16_to_f32='__half2float') + oneapi_kernel = compute_result_kernel.format(p='gpu', + f32_to_f16='(f16)', + f16_to_f32='(f32)') + + comp = '!cmp(ref, out, n{})'.format('' if t in common.iutypes \ + else ', {}'.format(operator.ufp[t])) with common.open_utf8(opts, filename) as out: out.write( @@ -519,7 +548,7 @@ def gen_tests_for(opts, t, tt, operator): __global__ void kernel({typ} *dst, {args_tabs}, int n) {{ int i = threadIdx.x + blockIdx.x * blockDim.x; if (i < n) {{ - {gpu_kernel} + {cuda_rocm_kernel} }} }} @@ -527,12 +556,14 @@ def gen_tests_for(opts, t, tt, operator): kernel<<<{gpu_params}>>>(dst, {args_tabs_call}, int(n)); }} + {cbprng_cuda} + #elif defined(NSIMD_ROCM) __global__ void kernel({typ} *dst, {args_tabs}, size_t n) {{ size_t i = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; if (i < n) {{ - {gpu_kernel} + {cuda_rocm_kernel} }} }} @@ -541,23 +572,41 @@ def gen_tests_for(opts, t, tt, operator): n); }} + {cbprng_hip} + + #elif defined(NSIMD_ONEAPI) + + inline void kernel({typ} *dst, {args_tabs}, const size_t n, + sycl::nd_item<1> item) {{ + size_t i = item.get_global_id().get(0); + if (i < n) {{ + {oneapi_kernel} + }} + }} + + void compute_result({typ} *dst, {args_tabs}, const size_t n) {{ + size_t total_num_threads = (size_t)nsimd_kernel_param((int)n, {tpb}); + sycl::queue q_ = nsimd::oneapi::default_queue(); + q_.parallel_for(sycl::nd_range<1>(sycl::range<1>(total_num_threads), + sycl::range<1>({tpb})), + [=](sycl::nd_item<1> item){{ + kernel(dst, {args_tabs_call}, n, item); + }}).wait_and_throw(); + }} + + {cbprng_oneapi} + #else - void compute_result({typ} *dst, {args_tabs}, - unsigned int n) {{ + void compute_result({typ} *dst, {args_tabs}, unsigned int n) {{ for (unsigned int i = 0; i < n; i++) {{ {cpu_kernel} }} }} - #endif + {cbprng_cpu} - nsimd_fill_dev_mem_func(prng5, - 1 + (((unsigned int)i * 69342380 + 414585) % 5)) - nsimd_fill_dev_mem_func(prng6, - 1 + (((unsigned int)i * 12528380 + 784535) % 6)) - nsimd_fill_dev_mem_func(prng7, - 1 + (((unsigned int)i * 22328380 + 644295) % 7)) + #endif int main() {{ unsigned int n_[3] = {{ 10, 1001, 10001 }}; @@ -584,7 +633,16 @@ def gen_tests_for(opts, t, tt, operator): '''.format(typ=t, args_tabs=args_tabs, fill_tabs=fill_tabs, args_tabs_call=args_tabs_call, gpu_params=gpu_params, free_tabs=free_tabs, tet1d_code=tet1d_code, comp=comp, - cpu_kernel=cpu_kernel, gpu_kernel=gpu_kernel)) + cpu_kernel=cpu_kernel, tpb=tpb, + cuda_rocm_kernel=cuda_rocm_kernel, + oneapi_kernel=oneapi_kernel, + cbprng_cpu=nsimd_tests.cbprng(t, operator, 'cpu'), + cbprng_cuda=nsimd_tests.cbprng(t, operator, 'cuda', + gpu_params), + cbprng_hip=nsimd_tests.cbprng(t, operator, 'hip', + gpu_params), + cbprng_oneapi=nsimd_tests.cbprng(t, operator, 'oneapi', + ['(int)n', str(tpb)]))) common.clang_format(opts, filename, cuda=True) @@ -592,14 +650,10 @@ def gen_tests(opts): for op_name, operator in operators.operators.items(): if not operator.has_scalar_impl: continue - for t in operator.types: - tts = common.get_output_types(t, operator.output_to) - for tt in tts: - if t == 'f16' and op_name in ['notb', 'andnotb', 'orb', - 'xorb', 'andb']: + if not nsimd_tests.should_i_do_the_test(operator, tt, t): continue if operator.name in ['shl', 'shr', 'shra']: gen_tests_for_shifts(opts, t, operator) @@ -718,6 +772,10 @@ def gen_functions(opts): __device__ {return_type} gpu_get(nsimd::nat i) const {{ {impl_gpu} }} + #elif defined(NSIMD_ONEAPI) + {return_type} gpu_get(nsimd::nat i) const {{ + {impl_gpu} + }} #else {return_type} scalar_get(nsimd::nat i) const {{ {impl_scalar} @@ -761,7 +819,8 @@ def gen_functions(opts): node<{op_name}_t, node, node::in_type>, none_t> - operator{cxx_operator}(node const &node, T a) {{ + operator{cxx_operator}(node const &node, + T a) {{ typedef typename tet1d::node::in_type S; return tet1d::{op_name}(node, literal_to::impl(a)); }} @@ -771,7 +830,8 @@ def gen_functions(opts): node<{op_name}_t, node::in_type>, node, none_t> - operator{cxx_operator}(T a, node const &node) {{ + operator{cxx_operator}(T a, + node const &node) {{ typedef typename tet1d::node::in_type S; return tet1d::{op_name}(literal_to::impl(a), node); }} @@ -815,7 +875,7 @@ def name(): def desc(): return '''This module provide a thin layer of expression templates above -NSIMD core. It also allows the programmer to target NVIDIA and AMD GPUs. +NSIMD core. It also allows the programmer to target Intel, NVIDIA and AMD GPUs. Expression template are a C++ technique that allows the programmer to write code "à la MATLAB" where variables usually represents vectors and operators are itemwise.''' diff --git a/egg/oneapi.py b/egg/oneapi.py new file mode 100644 index 00000000..7fb91bef --- /dev/null +++ b/egg/oneapi.py @@ -0,0 +1,402 @@ + +# Copyright (c) 2021 Agenium Scale +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# ----------------------------------------------------------------------------- +# References: + +# Functions: book: +# Data Parallel C++ +# Mastering DPC++ for Programming of Heterogeneous Systems using +# C++ and SYCL - Apress Open +# Table page 475: list of maths functions. float16 supported + +# sycl half type (f16) API: +# https://mmha.github.io/syclreference/libraries/types/half/ +# ----------------------------------------------------------------------------- + +import common +import scalar + +fmtspec = dict() + +# ----------------------------------------------------------------------------- + +def get_impl_f16(operator, totyp, typ): + + # Case 1: rounding functions + # no sycl function available for half type + # sycl function available for f32 + # use sycl defined conversions half --> f32 , f32 --> half + + # Case 2: no sycl function available for half type + # sycl function available for f32 + # use nsimd casts f32-->f16 + sycl function + f16-->f32 + + no_sycl_avail_f16_cast_use_sycl_f32 = \ + ['fma', 'fms', 'fnma', 'fnms', 'min', 'max', 'abs'] + + # Case 3: sycl provides functions supporting half type + + sycl_avail_functions_f16 = \ + ['rec', 'rec8', 'rec11', 'rsqrt8', 'rsqrt11', 'rsqrt', 'sqrt'] + + # Case 4: sycl half's type provided comparison operators + # Note: + # not documented in the book + # source: sycl half type (f16) API: + # https://mmha.github.io/syclreference/libraries/types/half/ + + sycl_avail_cmp_op_f16 = { + 'lt': 'return {in0} < {in1};', + 'gt': 'return {in0} > {in1};', + 'le': 'return {in0} <= {in1};', + 'ge': 'return {in0} >= {in1};', + 'ne': 'return {in0} != {in1};', + 'eq': 'return {in0} == {in1};' + } + + # Case 5: no sycl function available for any type + # use nsimd_scalar_[operator]_f16 + + # Dispatch + + # Case 1 + if operator.name in ['floor','ceil','trunc']: + return 'return f16(sycl::{op}(static_cast({in0})));'.\ + format(op=operator.name,**fmtspec) + elif operator.name == 'round_to_even': + return 'return f16(sycl::rint(static_cast({in0})));'.\ + format(**fmtspec) + + # Case 2 + elif operator.name in no_sycl_avail_f16_cast_use_sycl_f32: + if operator.name in ['fma', 'fms', 'fnma', 'fnms']: + neg = '-' if operator.name in ['fnma', 'fnms'] else '' + op = '-' if operator.name in ['fnms', 'fms'] else '' + return '''// cl::sycl::half::operator float + f32 x0 = static_cast({in0}); + f32 x1 = static_cast({in1}); + f32 x2 = static_cast({in2}); + f32 res = sycl::fma({neg}x0, x1, {op}x2); + // cl::sycl::half::half(const float& f) + return f16(res);'''.format(neg=neg, op=op, **fmtspec) + elif operator.name in ['min', 'max']: + op = 'fmin' if operator.name == 'min' else 'fmax' + return '''// cl::sycl::half::operator float + f32 x0 = static_cast({in0}); + f32 x1 = static_cast({in1}); + f32 res = sycl::{op}(x0, x1); + // cl::sycl::half::half(const float& f) + return f16(res);'''.format(op=op, **fmtspec) + elif operator.name == 'abs': + return '''// cl::sycl::half::operator float + f32 x0 = static_cast({in0}); + f32 res = sycl::fabs(x0); + // cl::sycl::half::half(const float& f) + return f16(res);'''.format(**fmtspec) + + # Case 3 + elif operator.name in sycl_avail_functions_f16: + if operator.name in ['rec8', 'rec11', 'rec']: + return '''// sycl::recip available in native form only + // availability in half-precision + return f16(1.0f / {in0});'''.format(**fmtspec) + elif operator.name in ['rsqrt8', 'rsqrt11', 'rsqrt']: + return 'return sycl::rsqrt({in0});'.format(**fmtspec) + elif operator.name == 'sqrt': + return 'return sycl::sqrt({in0});'.format(**fmtspec) + + # Case 4 + elif operator.name in sycl_avail_cmp_op_f16: + return sycl_avail_cmp_op_f16[operator.name].format(**fmtspec) + + # Case 5 + else: + args = ', '.join(['{{in{}}}'.format(i).format(**fmtspec) \ + for i in range(len(operator.params[1:]))]) + return 'return nsimd_scalar_{op}_f16({args});'.\ + format(op=operator.name, args=args) + +# ----------------------------------------------------------------------------- + +def reinterpret(totyp, typ): + if typ == totyp: + return 'return {in0};'.format(**fmtspec) + elif ((typ in common.ftypes and totyp in common.iutypes) or \ + (typ in common.iutypes and totyp in common.ftypes)): + return 'return nsimd_scalar_reinterpret_{totyp}_{typ}({in0});'. \ + format(**fmtspec) + else: + return '''{totyp} ret; + memcpy((void *)&ret, (void *)&{in0}, sizeof({in0})); + return ret;'''.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def get_impl(operator, totyp, typ): + + global fmtspec + + fmtspec = { + 'in0': common.in0, + 'in1': common.in1, + 'in2': common.in2, + 'typ': typ, + 'totyp': totyp, + 'typnbits': typ[1:] + } + + # src operators + if operator.src: + oneapi_ops = { + 'sin_u35': 'sin', + 'cos_u35': 'cos', + 'tan_u35': 'tan', + 'asin_u35': 'asin', + 'acos_u35': 'acos', + 'atan_u35': 'atan', + 'atan2_u35': 'atan2', + 'log_u35': 'log', + 'cbrt_u35': 'cbrt', + 'sin_u10': 'sin', + 'cos_u10': 'cos', + 'tan_u10': 'tan', + 'asin_u10': 'asin', + 'acos_u10': 'acos', + 'atan_u10': 'atan', + 'atan2_u10': 'atan2', + 'log_u10': 'log', + 'cbrt_u10': 'cbrt', + 'exp_u10': 'exp', + 'pow_u10': 'pow', + 'sinh_u10': 'sinh', + 'cosh_u10': 'cosh', + 'tanh_u10': 'tanh', + 'sinh_u35': 'sinh', + 'cosh_u35': 'cosh', + 'tanh_u35': 'tanh', + 'fastsin_u3500': 'sin', + 'fastcos_u3500': 'cos', + 'fastpow_u3500': 'pow', + 'asinh_u10': 'asinh', + 'acosh_u10': 'acosh', + 'atanh_u10': 'atanh', + 'exp2_u10': 'exp2', + 'exp2_u35': 'exp2', + 'exp10_u10': 'exp10', + 'exp10_u35': 'exp10', + 'expm1_u10': 'expm1', + 'log10_u10': 'log10', + 'log2_u10': 'log2', + 'log2_u35': 'log2', + 'log1p_u10': 'log1p', + 'sinpi_u05': 'sinpi', + 'cospi_u05': 'cospi', + 'hypot_u05': 'hypot', + 'hypot_u35': 'hypot', + 'remainder': 'remainder', + 'fmod': 'fmod', + 'lgamma_u10': 'lgamma', + 'tgamma_u10': 'tgamma', + 'erf_u10': 'erf', + 'erfc_u15': 'erfc' + } + return 'return cl::sycl::{}({});'.format( + oneapi_ops[operator.name], + common.get_args(len(operator.params[1:]))) + + # bool first, no special treatment for f16's + bool_operators = [ 'andl', 'orl', 'xorl', 'andnotl', 'notl' ] + if operator.name in bool_operators: + if operator.name == 'notl': + return 'return nsimd_scalar_{op}({in0});'.\ + format(op=operator.name,**fmtspec) + else: + return 'return nsimd_scalar_{op}({in0}, {in1});'.\ + format(op=operator.name,**fmtspec) + + # infix operators no special treatment for f16's + infix_operators = [ 'orb', 'andb', 'andnotb', 'notb', 'xorb' ] + if operator.name in infix_operators: + if operator.name == 'notb': + return 'return nsimd_scalar_{op}_{typ}({in0});'.\ + format(op=operator.name,**fmtspec) + else: + return 'return nsimd_scalar_{op}_{typ}({in0}, {in1});'.\ + format(op=operator.name,**fmtspec) + + # reinterpret + if operator.name == 'reinterpret': + return reinterpret(totyp, typ) + + # cvt + if operator.name == 'cvt': + if 'f16' == totyp: + # conversion op: takes in a 32 bit float and converts it to 16 bits + return 'return sycl::half(static_cast({in0}));'. \ + format(**fmtspec) + else: + return 'return nsimd_scalar_cvt_{totyp}_{typ}({in0});'. \ + format(**fmtspec) + + # to_mask + if operator.name == 'to_mask': + return 'return nsimd_scalar_to_mask_{totyp}({in0});'.format(**fmtspec) + + # to_logical + if operator.name == 'to_logical': + return 'return nsimd_scalar_to_logical_{typ}({in0});'.format(**fmtspec) + + # for all other operators, f16 has a special treatment + if typ == 'f16': + return get_impl_f16(operator, totyp, typ) + + # infix operators - rec - f32, f64 + infix_op_rec_ftypes = ['rec', 'rec8', 'rec11'] + + if typ in common.ftypes_no_f16 and operator.name in infix_op_rec_ftypes: + return '''// sycl::recip available in native form only + return 1.0{f} / {in0};'''. \ + format(f='f' if typ == 'f32' else '', **fmtspec) + + # infix operators - cmp - f32, f64 + infix_op_cmp_f32_f64 = { + 'lt': 'return {cast_to_int}sycl::isless({in0}, {in1});', + 'gt': 'return {cast_to_int}sycl::isgreater({in0}, {in1});', + 'le': 'return {cast_to_int}sycl::islessequal({in0}, {in1});', + 'ge': 'return {cast_to_int}sycl::isgreaterequal({in0}, {in1});', + 'ne': 'return {cast_to_int}sycl::isnotequal({in0}, {in1});', + 'eq': 'return {cast_to_int}sycl::isequal({in0}, {in1});' + } + + if typ in common.ftypes_no_f16 and operator.name in infix_op_cmp_f32_f64: + return infix_op_cmp_f32_f64[operator.name]. \ + format(cast_to_int='(int)' if typ == 'f64' else '', **fmtspec) + + # infix operators - cmp - integer types + infix_op_cmp_iutypes = [ 'lt', 'gt', 'le', 'ge', 'ne', 'eq' ] + if operator.name in infix_op_cmp_iutypes: + return 'return nsimd_scalar_{op}_{typ}({in0},{in1});'.\ + format(op=operator.name, **fmtspec) + + # infix operators f32, f64 + integers + # ref: see Data Parallel C++ book, pages 480, 481, 482 + # TODO: do the functions below call instrinsics/built-in + # functions on the device? + # 'add': 'return std::plus<{typ}>()({in0}, {in1});', + # 'sub': 'return std::minus<{typ}>()({in0}, {in1});', + # 'mul': 'return std::multiplies<{typ}>()({in0}, {in1});', + # 'div': 'return std::divides<{typ}>()({in0}, {in1});', + + infix_op_t = [ 'add', 'sub', 'mul', 'div' ] + if operator.name in infix_op_t: + return 'return nsimd_scalar_{op}_{typ}({in0}, {in1});'. \ + format(op=operator.name, **fmtspec) + + # neg + # ref: see Data Parallel C++ book, pages 480, 481, 482 + # TODO: does the function below call an instrinsic/built-in + # function on the device? + # 'neg': 'return std::negate<{typ}>()({in0});' + + if operator.name == 'neg': + return 'return nsimd_scalar_{op}_{typ}({in0});'. \ + format(op=operator.name, **fmtspec) + + # shifts + shifts_op_ui_t = [ 'shl', 'shr', 'shra' ] + if operator.name in shifts_op_ui_t and typ in common.iutypes: + return 'return nsimd_scalar_{op}_{typ}({in0}, {in1});'. \ + format(op=operator.name, **fmtspec) + + # adds + if operator.name == 'adds': + if typ in common.ftypes: + return 'return nsimd_scalar_add_{typ}({in0}, {in1});'. \ + format(**fmtspec) + else: + return 'return sycl::add_sat({in0}, {in1});'.format(**fmtspec) + + # subs + if operator.name == 'subs': + if typ in common.ftypes: + return 'return nsimd_scalar_sub_{typ}({in0}, {in1});'. \ + format(**fmtspec) + else: + return 'return sycl::sub_sat({in0}, {in1});'.format(**fmtspec) + + # fma's + if operator.name in ['fma', 'fms', 'fnma', 'fnms']: + if typ in common.ftypes: + neg = '-' if operator.name in ['fnma', 'fnms'] else '' + op = '-' if operator.name in ['fnms', 'fms'] else '' + return 'return sycl::fma({neg}{in0}, {in1}, {op}{in2});'. \ + format(op=op, neg=neg, **fmtspec) + else: + return 'return nsimd_scalar_{op}_{typ}({in0}, {in1}, {in2});'. \ + format(op=operator.name, **fmtspec) + + # other operators + # round_to_even, ceil, floor, trunc, min, max, abs, sqrt + + # round_to_even + if operator.name == 'round_to_even': + if typ in common.ftypes_no_f16: + return 'return sycl::rint({in0});'.format(**fmtspec) + else: + return 'return {in0};'.format(**fmtspec) + + # other rounding operators + other_rounding_ops = ['ceil', 'floor', 'trunc'] + if operator.name in other_rounding_ops: + if typ in common.iutypes: + return 'return nsimd_scalar_{op}_{typ}({in0});'. \ + format(op=operator.name, **fmtspec) + else: + return 'return sycl::{op}({in0});'. \ + format(op=operator.name, **fmtspec) + + # min/max + if operator.name in ['min', 'max']: + if typ in common.iutypes: + return 'return sycl::{op}({in0}, {in1});'.\ + format(op=operator.name, **fmtspec) + else: + op = 'sycl::fmin' if operator.name == 'min' else 'sycl::fmax' + return 'return {op}({in0}, {in1});'.format(op=op, **fmtspec) + + # abs + if operator.name == 'abs': + if typ in common.itypes: + return 'return ({typ})sycl::abs({in0});'.format(**fmtspec) + elif typ in common.utypes: + return 'return nsimd_scalar_abs_{typ}({in0});'.format(**fmtspec) + else: + return 'return sycl::fabs({in0});'.format(**fmtspec) + + # sqrt + if operator.name == 'sqrt' and typ in common.ftypes: + return 'return sycl::sqrt({in0});'.format(**fmtspec) + + # rsqrt + if operator.name in ['rsqrt8', 'rsqrt11', 'rsqrt'] and typ in common.ftypes: + return 'return sycl::rsqrt({in0});'.format(**fmtspec) + diff --git a/egg/operators.py b/egg/operators.py index 686f5656..58c185d0 100644 --- a/egg/operators.py +++ b/egg/operators.py @@ -1,7 +1,7 @@ # Use utf-8 encoding # -*- coding: utf-8 -*- -# Copyright (c) 2019 Agenium Scale +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,10 +23,8 @@ if __name__ == 'operators': import common - from common import Domain else: from . import common - from .common import Domain import collections # ----------------------------------------------------------------------------- @@ -159,9 +157,19 @@ def get_member_value(member): else: dct['autogen_cxx_adv'] = True - # Fill domain, default is R + # By default tests are done on random numbers depending on the type + # but sometimes one needs to produce only integers even if the + # type is a floating point type. + if 'tests_on_integers_only' not in dct: + dct['tests_on_integers_only'] = False; + + # Fill domain, default is [-20 ; +20] if 'domain' not in dct: - dct['domain'] = Domain('R') + dct['domain'] = [[-20, 20], [-20, 20], [-20, 20]] + + # Number of UFP (cf. documentation) for testing + if 'ufp' not in dct: + dct['ufp'] = {'f16': 8, 'f32': 18, 'f64': 45} # Check that params is not empty if len(dct['params']) == 0: @@ -176,13 +184,11 @@ def get_member_value(member): if 'desc' not in dct: arg = 'arguments' if len(dct['params']) > 2 else 'argument' if dct['params'][0] == '_': - dct['desc'] = \ - '{} the {}. Defined over {}.'. \ - format(dct['full_name'].capitalize(), arg, dct['domain']) + dct['desc'] = '{} the {}.'. \ + format(dct['full_name'].capitalize(), arg) else: - dct['desc'] = \ - 'Returns the {} of the {}. Defined over {}.'.\ - format(dct['full_name'], arg, dct['domain']) + dct['desc'] = 'Returns the {} of the {}.'.\ + format(dct['full_name'], arg) # Fill src, default is operator is in header not in source if not member_is_defined('src'): @@ -201,7 +207,7 @@ def get_member_value(member): 'vx4' in dct['params'] or \ dct['output_to'] in [common.OUTPUT_TO_UP_TYPES, common.OUTPUT_TO_DOWN_TYPES] or \ - dct['load_store'] or get_member_value('src'): + dct['load_store']: dct['has_scalar_impl'] = False else: dct['has_scalar_impl'] = True @@ -213,12 +219,12 @@ def get_member_value(member): class Operator(object, metaclass=MAddToOperators): # Default values (for general purpose) - domain = Domain('R') cxx_operator = None autogen_cxx_adv = True output_to = common.OUTPUT_TO_SAME_TYPE types = common.types params = [] + aliases = [] signature = '' # Enable bench by default @@ -235,10 +241,6 @@ class Operator(object, metaclass=MAddToOperators): bench_auto_against_std = False use_for_parsing = True - # Defaults values (for tests) - tests_mpfr = False - tests_ulps = {} - @property def returns(self): return self.params[0] @@ -303,6 +305,8 @@ def get_fmtspec(self, t, tt, simd_ext): ret['name'] = self.name ret['hbar'] = common.hbar ret['simd_ext'] = simd_ext + if self.src and 'sleef_symbol_prefix' in self.__class__.__dict__: + ret['sleef_symbol_prefix'] = self.sleef_symbol_prefix return ret def get_generic_signature(self, lang): @@ -314,6 +318,12 @@ def get_generic_signature(self, lang): args=args), '#define v{name}_e({args}, simd_ext)'. \ format(name=self.name, args=args)] + elif lang == 'c_adv': + args = ['a{}'.format(i - 1) for i in range(1, len(self.params))] + if not self.closed: + args = ['to_type'] + args + args = ', '.join(args) + return '#define nsimd_{}({})'.format(self.name, args) elif lang == 'cxx_base': def get_type(param, typename): if param == '_': @@ -547,7 +557,7 @@ def get_scalar_signature(self, cpu_gpu, t, tt, lang): sig = '__device__ ' if cpu_gpu == 'gpu' else '' sig += common.get_one_type_scalar(self.params[0], tt) + ' ' func_name = 'nsimd_' if lang == 'c' else '' - func_name += 'gpu_' if cpu_gpu == 'gpu' else 'scalar_' + func_name += 'gpu_' if cpu_gpu in ['gpu', 'oneapi'] else 'scalar_' func_name += self.name operator_on_logicals = (self.params == ['l'] * len(self.params)) if lang == 'c' and not operator_on_logicals: @@ -573,7 +583,6 @@ class SrcOperator(Operator): class Len(Operator): full_name = 'vector length' signature = 'p len' - domain = Domain('') categories = [DocMisc] class Set1(Operator): @@ -698,7 +707,6 @@ class MaskStoreu1(Operator): class Store2u(Operator): signature = '_ store2u * v v' load_store = True - domain = Domain('RxR') categories = [DocLoadStore] desc = 'Store 2 SIMD vectors as array of structures of 2 members into ' + \ 'unaligned memory.' @@ -707,7 +715,6 @@ class Store3u(Operator): full_name = 'store into array of structures' signature = '_ store3u * v v v' load_store = True - domain = Domain('RxRxR') categories = [DocLoadStore] desc = 'Store 3 SIMD vectors as array of structures of 3 members into ' + \ 'unaligned memory.' @@ -716,7 +723,6 @@ class Store4u(Operator): full_name = 'store into array of structures' signature = '_ store4u * v v v v' load_store = True - domain = Domain('RxRxRxR') categories = [DocLoadStore] desc = 'Store 4 SIMD vectors as array of structures of 4 members into ' + \ 'unaligned memory.' @@ -737,7 +743,6 @@ class Store2a(Operator): full_name = 'store into array of structures' signature = '_ store2a * v v' load_store = True - domain = Domain('RxR') categories = [DocLoadStore] desc = 'Store 2 SIMD vectors as array of structures of 2 members into ' + \ 'aligned memory.' @@ -746,7 +751,6 @@ class Store3a(Operator): full_name = 'store into array of structures' signature = '_ store3a * v v v' load_store = True - domain = Domain('RxRxR') categories = [DocLoadStore] desc = 'Store 3 SIMD vectors as array of structures of 3 members into ' + \ 'aligned memory.' @@ -755,7 +759,6 @@ class Store4a(Operator): full_name = 'store into array of structures' signature = '_ store4a * v v v v' load_store = True - domain = Domain('RxRxRxR') categories = [DocLoadStore] desc = 'Store 4 SIMD vectors as array of structures of 4 members into ' + \ 'aligned memory.' @@ -837,7 +840,6 @@ class Storelu(Operator): signature = '_ storelu * l' load_store = True categories = [DocLoadStore] - domain = Domain('R') desc = 'Store SIMD vector of booleans into unaligned memory. True is ' + \ 'stored as 1 and False as 0.' @@ -846,7 +848,6 @@ class Storela(Operator): signature = '_ storela * l' load_store = True categories = [DocLoadStore] - domain = Domain('R') desc = 'Store SIMD vector of booleans into aligned memory. True is ' + \ 'stored as 1 and False as 0.' @@ -854,81 +855,61 @@ class Orb(Operator): full_name = 'bitwise or' signature = 'v orb v v' cxx_operator = '|' - domain = Domain('RxR') categories = [DocBitsOperators] - #bench_auto_against_std = True ## TODO: Add check to floating-types - bench_auto_against_mipp = True class Andb(Operator): full_name = 'bitwise and' signature = 'v andb v v' cxx_operator = '&' - domain = Domain('RxR') categories = [DocBitsOperators] - #bench_auto_against_std = True ## TODO: Add check to floating-types - bench_auto_against_mipp = True class Andnotb(Operator): full_name = 'bitwise andnot' signature = 'v andnotb v v' - domain = Domain('RxR') categories = [DocBitsOperators] - bench_auto_against_mipp = True - - def bench_mipp_name(self, typ): - return 'mipp::andnb<{}>'.format(typ) + desc = 'Returns the bitwise andnot of its arguments, more precisely ' \ + '"arg1 and (not arg2)"' class Notb(Operator): full_name = 'bitwise not' signature = 'v notb v' cxx_operator = '~' - domain = Domain('R') categories = [DocBitsOperators] - #bench_auto_against_std = True ## TODO: Add check to floating-types - bench_auto_against_mipp = True class Xorb(Operator): full_name = 'bitwise xor' signature = 'v xorb v v' cxx_operator = '^' - domain = Domain('RxR') categories = [DocBitsOperators] - #bench_auto_against_std = True ## TODO: Add check to floating-types - bench_auto_against_mipp = True class Orl(Operator): full_name = 'logical or' signature = 'l orl l l' cxx_operator = '||' - domain = Domain('BxB') categories = [DocLogicalOperators] - bench_auto_against_std = True class Andl(Operator): full_name = 'logical and' signature = 'l andl l l' cxx_operator = '&&' - domain = Domain('BxB') categories = [DocLogicalOperators] - bench_auto_against_std = True class Andnotl(Operator): full_name = 'logical andnot' signature = 'l andnotl l l' - domain = Domain('BxB') categories = [DocLogicalOperators] + desc = 'Returns the logical andnot of its arguments, more precisely ' \ + '"arg1 and (not arg2)"' class Xorl(Operator): full_name = 'logical xor' signature = 'l xorl l l' - domain = Domain('BxB') categories = [DocLogicalOperators] class Notl(Operator): full_name = 'logical not' signature = 'l notl l' cxx_operator = '!' - domain = Domain('B') categories = [DocLogicalOperators] bench_auto_against_std = True @@ -936,7 +917,6 @@ class Add(Operator): full_name = 'addition' signature = 'v add v v' cxx_operator = '+' - domain = Domain('RxR') categories = [DocBasicArithmetic] bench_auto_against_std = True bench_auto_against_mipp = True @@ -945,7 +925,6 @@ class Sub(Operator): full_name = 'subtraction' signature = 'v sub v v' cxx_operator = '-' - domain = Domain('RxR') categories = [DocBasicArithmetic] bench_auto_against_std = True bench_auto_against_mipp = True @@ -953,7 +932,6 @@ class Sub(Operator): class Addv(Operator): full_name = 'horizontal sum' signature = 's addv v' - domain = Domain('R') categories = [DocMisc] desc = 'Returns the sum of all the elements contained in v' do_bench = False @@ -963,70 +941,49 @@ class Mul(Operator): full_name = 'multiplication' signature = 'v mul v v' cxx_operator = '*' - domain = Domain('RxR') categories = [DocBasicArithmetic] - bench_auto_against_std = True - bench_auto_against_mipp = True class Div(Operator): full_name = 'division' signature = 'v div v v' cxx_operator = '/' - domain = Domain('RxR\{0}') + domain = [[-20, 20], [0.5, 20]] categories = [DocBasicArithmetic] - bench_auto_against_std = True - bench_auto_against_mipp = True class Neg(Operator): full_name = 'opposite' signature = 'v neg v' cxx_operator = '-' - domain = Domain('R') categories = [DocBasicArithmetic] - bench_auto_against_std = True class Min(Operator): full_name = 'minimum' signature = 'v min v v' - domain = Domain('RxR') categories = [DocBasicArithmetic] class Max(Operator): full_name = 'maximum' signature = 'v max v v' - domain = Domain('RxR') categories = [DocBasicArithmetic] - bench_auto_against_mipp = True class Shr(Operator): full_name = 'right shift in zeros' signature = 'v shr v p' types = common.iutypes cxx_operator = '>>' - domain = Domain('RxN') categories = [DocBitsOperators] - bench_auto_against_mipp = True - - def bench_mipp_name(self, typ): - return 'mipp::rshift<{}>'.format(typ) class Shl(Operator): full_name = 'left shift' signature = 'v shl v p' types = common.iutypes cxx_operator = '<<' - domain = Domain('RxN') categories = [DocBitsOperators] - bench_auto_against_mipp = True - - def bench_mipp_name(self, typ): - return 'mipp::lshift<{}>'.format(typ) class Shra(Operator): full_name = 'arithmetic right shift' signature = 'v shra v p' types = common.iutypes - domain = Domain('R+xN') categories = [DocBitsOperators] desc = 'Performs a right shift operation with sign extension.' @@ -1034,84 +991,46 @@ class Eq(Operator): full_name = 'compare for equality' signature = 'l eq v v' cxx_operator = '==' - domain = Domain('RxR') categories = [DocComparison] - bench_auto_against_std = True - bench_auto_against_mipp = True - desc = 'Compare the inputs for equality.' - - def bench_mipp_name(self, typ): - return 'mipp::cmpeq<{}>'.format(typ) class Ne(Operator): full_name = 'compare for inequality' signature = 'l ne v v' cxx_operator = '!=' - domain = Domain('RxR') categories = [DocComparison] - bench_auto_against_std = True - bench_auto_against_mipp = True desc = 'Compare the inputs for inequality.' - def bench_mipp_name(self, typ): - return 'mipp::cmpneq<{}>'.format(typ) - class Gt(Operator): full_name = 'compare for greater-than' signature = 'l gt v v' cxx_operator = '>' - domain = Domain('RxR') categories = [DocComparison] - bench_auto_against_std = True - bench_auto_against_mipp = True desc = 'Compare the inputs for greater-than.' - def bench_mipp_name(self, typ): - return 'mipp::cmpgt<{}>'.format(typ) - class Ge(Operator): full_name = 'compare for greater-or-equal-than' signature = 'l ge v v' cxx_operator = '>=' - domain = Domain('RxR') categories = [DocComparison] - bench_auto_against_std = True - bench_auto_against_mipp = True desc = 'Compare the inputs for greater-or-equal-than.' - def bench_mipp_name(self, typ): - return 'mipp::cmpge<{}>'.format(typ) - class Lt(Operator): full_name = 'compare for lesser-than' signature = 'l lt v v' cxx_operator = '<' - domain = Domain('RxR') categories = [DocComparison] - bench_auto_against_std = True - bench_auto_against_mipp = True desc = 'Compare the inputs for lesser-than.' - def bench_mipp_name(self, typ): - return 'mipp::cmplt<{}>'.format(typ) - class Le(Operator): full_name = 'compare for lesser-or-equal-than' signature = 'l le v v' cxx_operator = '<=' - domain = Domain('RxR') categories = [DocComparison] - bench_auto_against_std = True - bench_auto_against_mipp = True desc = 'Compare the inputs for lesser-or-equal-than.' - def bench_mipp_name(self, typ): - return 'mipp::cmple<{}>'.format(typ) - class If_else1(Operator): full_name = 'blend' signature = 'v if_else1 l v v' - domain = Domain('BxRxR') categories = [DocMisc] desc = 'Blend the inputs using the vector of logical as a first ' + \ 'argument. Elements of the second input is taken when the ' + \ @@ -1121,93 +1040,70 @@ class If_else1(Operator): class Abs(Operator): full_name = 'absolute value' signature = 'v abs v' - domain = Domain('R') categories = [DocBasicArithmetic] - bench_auto_against_mipp = True - bench_auto_against_sleef = True - #bench_auto_against_std = True - - def bench_sleef_name(self, simd, typ): - return common.sleef_name('fabs', simd, typ) class Fma(Operator): full_name = 'fused multiply-add' signature = 'v fma v v v' - domain = Domain('RxRxR') categories = [DocBasicArithmetic] - tests_ulps = {'f16':'10', 'f32':'22', 'f64':'50'} desc = 'Multiply the first and second inputs and then adds the third ' + \ 'input.' + tests_on_integers_only = True class Fnma(Operator): full_name = 'fused negate-multiply-add' signature = 'v fnma v v v' - domain = Domain('RxRxR') categories = [DocBasicArithmetic] - tests_ulps = {'f16':'10', 'f32':'22', 'f64':'50'} desc = 'Multiply the first and second inputs, negate the intermediate ' + \ 'result and then adds the third input.' + tests_on_integers_only = True class Fms(Operator): full_name = 'fused multiply-substract' signature = 'v fms v v v' - domain = Domain('RxRxR') categories = [DocBasicArithmetic] - tests_ulps = {'f16':'10', 'f32':'22', 'f64':'50'} desc = 'Substracts the third input to multiplication the first and ' + \ 'second inputs.' + tests_on_integers_only = True class Fnms(Operator): full_name = 'fused negate-multiply-substract' signature = 'v fnms v v v' - domain = Domain('RxRxR') categories = [DocBasicArithmetic] - tests_ulps = {'f16':'10', 'f32':'22', 'f64':'50'} desc = 'Multiply the first and second inputs, negate the intermediate ' + \ 'result and then substracts the third input to the ' + \ 'intermediate result.' + tests_on_integers_only = True class Ceil(Operator): full_name = 'rounding up to integer value' signature = 'v ceil v' - domain = Domain('R') categories = [DocRounding] - bench_auto_against_sleef = True - bench_auto_against_std = True class Floor(Operator): full_name = 'rounding down to integer value' signature = 'v floor v' - domain = Domain('R') categories = [DocRounding] - bench_auto_against_sleef = True - bench_auto_against_std = True class Trunc(Operator): full_name = 'rounding towards zero to integer value' signature = 'v trunc v' - domain = Domain('R') categories = [DocRounding] - bench_auto_against_sleef = True - bench_auto_against_std = True class Round_to_even(Operator): full_name = 'rounding to nearest integer value, tie to even' signature = 'v round_to_even v' - domain = Domain('R') categories = [DocRounding] class All(Operator): full_name = 'check all elements' signature = 'p all l' - domain = Domain('B') categories = [DocMisc] desc = 'Return true if and only if all elements of the inputs are true.' class Any(Operator): full_name = 'check for one true elements' signature = 'p any l' - domain = Domain('B') categories = [DocMisc] desc = 'Return true if and only if at least one element of the inputs ' + \ 'is true.' @@ -1215,7 +1111,6 @@ class Any(Operator): class Nbtrue(Operator): full_name = 'count true elements' signature = 'p nbtrue l' - domain = Domain('B') categories = [DocMisc] desc = 'Return the number of true elements in the input.' @@ -1223,22 +1118,16 @@ class Reinterpret(Operator): full_name = 'reinterpret vector' signature = 'v reinterpret v' output_to = common.OUTPUT_TO_SAME_SIZE_TYPES - domain = Domain('R') categories = [DocConversion] - ## Disable bench - do_bench = False desc = 'Reinterpret input vector into a different vector type ' + \ 'preserving all bits.' class Reinterpretl(Operator): full_name = 'reinterpret vector of logicals' signature = 'l reinterpretl l' - domain = Domain('B') categories = [DocConversion] output_to = common.OUTPUT_TO_SAME_SIZE_TYPES has_scalar_impl = False - ## Disable bench - do_bench = False desc = 'Reinterpret input vector of logicals into a different vector ' + \ 'type of logicals preserving all elements values. The output ' + \ 'type must have same length as input type.' @@ -1247,42 +1136,33 @@ class Cvt(Operator): full_name = 'convert vector' signature = 'v cvt v' output_to = common.OUTPUT_TO_SAME_SIZE_TYPES - domain = Domain('R') categories = [DocConversion] desc = 'Convert input vector into a different vector type. The output ' + \ 'type must have same length as input type.' - ## Disable bench - do_bench = False class Upcvt(Operator): full_name = 'convert vector to larger type' signature = 'vx2 upcvt v' output_to = common.OUTPUT_TO_UP_TYPES - domain = Domain('R') types = ['i8', 'u8', 'i16', 'u16', 'f16', 'i32', 'u32', 'f32'] categories = [DocConversion] desc = 'Convert input vector into a different larger vector type. The ' + \ 'output type must be twice as large as the input type.' - ## Disable bench - do_bench = False class Downcvt(Operator): full_name = 'convert vector to narrow type' signature = 'v downcvt v v' output_to = common.OUTPUT_TO_DOWN_TYPES - domain = Domain('R') types = ['i16', 'u16', 'f16', 'i32', 'u32', 'f32', 'i64', 'u64', 'f64'] categories = [DocConversion] desc = 'Convert input vector into a different narrow vector type. The ' + \ 'output type must be twice as less as the input type.' - ## Disable bench - do_bench = False class Rec(Operator): full_name = 'reciprocal' signature = 'v rec v' types = common.ftypes - domain = Domain('R\{0}') + domain = [[-20, -0.5, 0.5, 20]] categories = [DocBasicArithmetic] class Rec11(Operator): @@ -1290,51 +1170,45 @@ class Rec11(Operator): signature = 'v rec11 v' types = common.ftypes categories = [DocBasicArithmetic] - domain = Domain('R\{0}') - tests_ulps = common.ulps_from_relative_distance_power(11) + domain = [[-20, -0.5, 0.5, 20]] + ufp = { 'f16': 10, 'f32': 10, 'f64': 10 } class Rec8(Operator): - full_name = 'reciprocal with relative error at most 2^{-8}' + full_name = 'reciprocal with relative error at most $2^{-8}$' signature = 'v rec8 v' types = common.ftypes categories = [DocBasicArithmetic] - domain = Domain('R\{0}') - tests_ulps = common.ulps_from_relative_distance_power(8) + domain = [[-20, -0.5, 0.5, 20]] + ufp = { 'f16': 7, 'f32': 7, 'f64': 7 } class Sqrt(Operator): full_name = 'square root' signature = 'v sqrt v' types = common.ftypes - domain = Domain('[0,Inf)') + domain = [[0, 20]] categories = [DocBasicArithmetic] - bench_auto_against_mipp = True - bench_auto_against_sleef = True - bench_auto_against_std = True - tests_mpfr = True class Rsqrt11(Operator): full_name = 'square root with relative error at most $2^{-11}$' signature = 'v rsqrt11 v' types = common.ftypes - domain = Domain('[0,Inf)') + domain = [[0.5, 20]] + ufp = { 'f16': 10, 'f32': 10, 'f64': 10 } categories = [DocBasicArithmetic] - tests_ulps = common.ulps_from_relative_distance_power(11) class Rsqrt8(Operator): full_name = 'square root with relative error at most $2^{-8}$' signature = 'v rsqrt8 v' types = common.ftypes - domain = Domain('[0,Inf)') + domain = [[0.5, 20]] + ufp = { 'f16': 7, 'f32': 7, 'f64': 7 } categories = [DocBasicArithmetic] - tests_ulps = common.ulps_from_relative_distance_power(8) class Ziplo(Operator): full_name = 'zip low halves' signature = 'v ziplo v v' types = common.types - domain = Domain('R') categories = [DocShuffle] - do_bench = False desc = 'Construct a vector where elements of the first low half input ' + \ 'are followed by the corresponding element of the second low ' + \ 'half input.' @@ -1343,9 +1217,7 @@ class Ziphi(Operator): full_name = 'zip high halves' signature = 'v ziphi v v' types = common.types - domain = Domain('R') categories = [DocShuffle] - do_bench = False desc = 'Construct a vector where elements of the first high half ' + \ 'input are followed by the corresponding element of the second ' + \ 'high half input.' @@ -1354,39 +1226,30 @@ class Unziplo(Operator): full_name = 'unziplo' signature = 'v unziplo v v' types = common.types - domain = Domain('R') categories = [DocShuffle] - do_bench = False class Unziphi(Operator): full_name = 'unziphi' signature = 'v unziphi v v' types = common.types - domain = Domain('R') categories = [DocShuffle] - do_bench = False class Zip(Operator): full_name = 'zip' signature = 'vx2 zip v v' types = common.types - domain = Domain('R') categories = [DocShuffle] - do_bench = False class Unzip(Operator): full_name = 'unzip' signature = 'vx2 unzip v v' types = common.types - fomain = Domain('R') categories = [DocShuffle] - do_bench = False class ToMask(Operator): full_name = 'build mask from logicals' signature = 'v to_mask l' categories = [DocLogicalOperators] - do_bench = False desc = 'Returns a mask consisting of all ones for true elements and ' + \ 'all zeros for false elements.' @@ -1394,7 +1257,6 @@ class ToLogical(Operator): full_name = 'build logicals from data' signature = 'l to_logical v' categories = [DocLogicalOperators] - do_bench = False desc = 'Returns a vector of logicals. Set true when the corresponding ' + \ 'elements are non zero (at least one bit to 1) and false ' + \ 'otherwise.' @@ -1403,7 +1265,6 @@ class Iota(Operator): full_name = 'fill vector with increasing values' signature = 'v iota' categories = [DocMisc] - do_bench = False desc = 'Returns a vectors whose first element is zero, the second is ' \ 'one and so on.' @@ -1411,7 +1272,6 @@ class MaskForLoopTail(Operator): full_name = 'build mask for ending loops' signature = 'l mask_for_loop_tail p p' categories = [DocMisc] - do_bench = False desc = 'Returns a mask for loading/storing data at loop tails by ' \ 'setting the first elements to True and the last to False. ' \ 'The first argument is index in a loop whose number of elements ' \ @@ -1420,30 +1280,467 @@ class MaskForLoopTail(Operator): class Adds(Operator): full_name = 'addition using saturation' signature = 'v adds v v' - domain = Domain('RxR') categories = [DocBasicArithmetic] desc = 'Returns the saturated sum of the two vectors given as arguments' class Subs(Operator): full_name = 'subtraction using saturation' signature = 'v subs v v' - domain = Domain('RxR') categories = [DocBasicArithmetic] - desc = 'Returns the saturated subtraction of the two vectors given as arguments' + desc = 'Returns the saturated subtraction of the two vectors given as ' \ + 'arguments' + +class Sin_u35(SrcOperator): + full_name = 'sine' + signature = 'v sin_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_sin_u35' + categories = [DocTrigo] + desc = 'Compute the sine of its argument with a precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Cos_u35(SrcOperator): + full_name = 'cosine' + signature = 'v cos_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_cos_u35' + categories = [DocTrigo] + desc = 'Compute the cosine of its argument with a precision of ' \ + '3.5 ulps. ' \ + 'For more informations visit .' + +class Tan_u35(SrcOperator): + full_name = 'tangent' + signature = 'v tan_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_tan_u35' + domain = [[-4.7, -1.6, -1.5, 1.5, 1.6, 4.7]] + categories = [DocTrigo] + desc = 'Compute the tangent of its argument with a precision of ' \ + '3.5 ulps. ' \ + 'For more informations visit .' + +class Asin_u35(SrcOperator): + full_name = 'arcsine' + signature = 'v asin_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_asin_u35' + domain = [[-0.9, 0.9]] + categories = [DocTrigo] + desc = 'Compute the arcsine of its argument with a precision of ' \ + '3.5 ulps. ' \ + 'For more informations visit .' + +class Acos_u35(SrcOperator): + full_name = 'arccosine' + signature = 'v acos_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_acos_u35' + domain = [[-0.9, 0.9]] + categories = [DocTrigo] + desc = 'Compute the arccosine of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Atan_u35(SrcOperator): + full_name = 'arctangent' + signature = 'v atan_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_atan_u35' + categories = [DocTrigo] + desc = 'Compute the arctangent of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Atan2_u35(SrcOperator): + full_name = 'arctangent' + signature = 'v atan2_u35 v v' + sleef_symbol_prefix = 'nsimd_sleef_atan2_u35' + domain = [[-20, 20], [-20, -0.5, 0.5, 20]] + categories = [DocTrigo] + desc = 'Compute the arctangent of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Log_u35(SrcOperator): + full_name = 'natural logarithm' + signature = 'v log_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_log_u35' + domain = [[0.5, 20]] + categories = [DocExpLog] + desc = 'Compute the natural logarithm of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Cbrt_u35(SrcOperator): + full_name = 'cube root' + signature = 'v cbrt_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_cbrt_u35' + categories = [DocBasicArithmetic] + desc = 'Compute the cube root of its argument with a precision of ' \ + '3.5 ulps. ' \ + 'For more informations visit .' + +class Sin_u10(SrcOperator): + full_name = 'sine' + signature = 'v sin_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_sin_u10' + categories = [DocTrigo] + desc = 'Compute the sine of its argument with a precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Cos_u10(SrcOperator): + full_name = 'cosine' + signature = 'v cos_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_cos_u10' + categories = [DocTrigo] + desc = 'Compute the cosine of its argument with a precision of ' \ + '1.0 ulps. ' \ + 'For more informations visit .' + +class Tan_u10(SrcOperator): + full_name = 'tangent' + signature = 'v tan_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_tan_u10' + domain = [[-4.7, -1.6, -1.5, 1.5, 1.6, 4.7]] + categories = [DocTrigo] + desc = 'Compute the tangent of its argument with a precision of ' \ + '1.0 ulps. ' \ + 'For more informations visit .' + +class Asin_u10(SrcOperator): + full_name = 'arcsine' + signature = 'v asin_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_asin_u10' + domain = [[-0.9, 0.9]] + categories = [DocTrigo] + desc = 'Compute the arcsine of its argument with a precision of ' \ + '1.0 ulps. ' \ + 'For more informations visit .' + +class Acos_u10(SrcOperator): + full_name = 'arccosine' + signature = 'v acos_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_acos_u10' + domain = [[-0.9, 0.9]] + categories = [DocTrigo] + desc = 'Compute the arccosine of its argument with a precision of ' \ + '1.0 ulps. ' \ + 'For more informations visit .' + +class Atan_u10(SrcOperator): + full_name = 'arctangent' + signature = 'v atan_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_atan_u10' + categories = [DocTrigo] + desc = 'Compute the arctangent of its argument with a precision of ' \ + '1.0 ulps. ' \ + 'For more informations visit .' + +class Atan2_u10(SrcOperator): + full_name = 'arctangent' + signature = 'v atan2_u10 v v' + sleef_symbol_prefix = 'nsimd_sleef_atan2_u10' + domain = [[-20, 20], [-20, -0.5, 0.5, 20]] + categories = [DocTrigo] + desc = 'Compute the arctangent of its argument with a precision of ' \ + '1.0 ulps. ' \ + 'For more informations visit .' + +class Log_u10(SrcOperator): + full_name = 'natural logarithm' + signature = 'v log_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_log_u10' + domain = [[0.5, 20]] + categories = [DocExpLog] + desc = 'Compute the natural logarithm of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Cbrt_u10(SrcOperator): + full_name = 'cube root' + signature = 'v cbrt_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_cbrt_u10' + categories = [DocBasicArithmetic] + desc = 'Compute the cube root of its argument with a precision of ' \ + '1.0 ulps. ' \ + 'For more informations visit .' + +class Exp_u10(SrcOperator): + full_name = 'base-e exponential' + signature = 'v exp_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_exp_u10' + domain = [[-20, 5]] + categories = [DocExpLog] + desc = 'Compute the base-e exponential of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Pow_u10(SrcOperator): + full_name = 'power' + signature = 'v pow_u10 v v' + sleef_symbol_prefix = 'nsimd_sleef_pow_u10' + domain = [[0, 5], [-5, 5]] + categories = [DocExpLog] + desc = 'Compute the power of its argument with a precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Sinh_u10(SrcOperator): + full_name = 'hyperbolic sine' + signature = 'v sinh_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_sinh_u10' + categories = [DocHyper] + desc = 'Compute the hyperbolic sine of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Cosh_u10(SrcOperator): + full_name = 'hyperbolic cosine' + signature = 'v cosh_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_cosh_u10' + categories = [DocHyper] + desc = 'Compute the hyperbolic cosine of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Tanh_u10(SrcOperator): + full_name = 'hyperbolic tangent' + signature = 'v tanh_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_tanh_u10' + categories = [DocHyper] + desc = 'Compute the hyperbolic tangent of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Sinh_u35(SrcOperator): + full_name = 'hyperbolic sine' + signature = 'v sinh_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_sinh_u35' + categories = [DocHyper] + desc = 'Compute the hyperbolic sine of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Cosh_u35(SrcOperator): + full_name = 'hyperbolic cosine' + signature = 'v cosh_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_cosh_u35' + categories = [DocHyper] + desc = 'Compute the hyperbolic cosine of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Tanh_u35(SrcOperator): + full_name = 'hyperbolic tangent' + signature = 'v tanh_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_tanh_u35' + categories = [DocHyper] + desc = 'Compute the hyperbolic tangent of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Asinh_u10(SrcOperator): + full_name = 'inverse hyperbolic sine' + signature = 'v asinh_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_asinh_u10' + categories = [DocHyper] + desc = 'Compute the inverse hyperbolic sine of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Acosh_u10(SrcOperator): + full_name = 'inverse hyperbolic cosine' + signature = 'v acosh_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_acosh_u10' + categories = [DocHyper] + domain = [[1, 20]] + desc = 'Compute the inverse hyperbolic cosine of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Atanh_u10(SrcOperator): + full_name = 'inverse hyperbolic tangent' + signature = 'v atanh_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_atanh_u10' + domain = [[-0.9, 0.9]] + categories = [DocHyper] + desc = 'Compute the inverse hyperbolic tangent of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Exp2_u10(SrcOperator): + full_name = 'base-2 exponential' + signature = 'v exp2_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_exp2_u10' + domain = [[-20, 5]] + categories = [DocExpLog] + desc = 'Compute the base-2 exponential of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Exp2_u35(SrcOperator): + full_name = 'base-2 exponential' + signature = 'v exp2_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_exp2_u35' + domain = [[-20, 5]] + categories = [DocExpLog] + desc = 'Compute the base-2 exponential of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Exp10_u10(SrcOperator): + full_name = 'base-10 exponential' + signature = 'v exp10_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_exp10_u10' + domain = [[-5, 3]] + categories = [DocExpLog] + desc = 'Compute the base-10 exponential of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Exp10_u35(SrcOperator): + full_name = 'base-10 exponential' + signature = 'v exp10_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_exp10_u35' + domain = [[-5, 3]] + categories = [DocExpLog] + desc = 'Compute the base-10 exponential of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Expm1_u10(SrcOperator): + full_name = 'exponential minus 1' + signature = 'v expm1_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_expm1_u10' + domain = [[-5, 3]] + categories = [DocExpLog] + desc = 'Compute the exponential minus 1 of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Log10_u10(SrcOperator): + full_name = 'base-10 logarithm' + signature = 'v log10_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_log10_u10' + domain = [[0.5, 20]] + categories = [DocExpLog] + desc = 'Compute the base-10 logarithm of its argument with a precision ' \ + 'of 1.0 ulps. ' \ + 'For more informations visit .' + +class Log2_u10(SrcOperator): + full_name = 'base-2 logarithm' + signature = 'v log2_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_log2_u10' + domain = [[0.5, 20]] + categories = [DocExpLog] + desc = 'Compute the base-2 logarithm of its argument with a precision ' \ + 'of 1.0 ulps. ' \ + 'For more informations visit .' + +class Log2_u35(SrcOperator): + full_name = 'base-2 logarithm' + signature = 'v log2_u35 v' + sleef_symbol_prefix = 'nsimd_sleef_log2_u35' + domain = [[0.5, 20]] + categories = [DocExpLog] + desc = 'Compute the base-2 logarithm of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Log1p_u10(SrcOperator): + full_name = 'logarithm of 1 plus argument' + signature = 'v log1p_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_log1p_u10' + domain = [[-0.5, 19]] + categories = [DocExpLog] + desc = 'Compute the logarithm of 1 plus argument of its argument with ' \ + 'a precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Sinpi_u05(SrcOperator): + full_name = 'sine of pi times argument' + signature = 'v sinpi_u05 v' + sleef_symbol_prefix = 'nsimd_sleef_sinpi_u05' + categories = [DocTrigo] + desc = 'Compute the sine of pi times argument of its argument with a ' \ + 'precision of 0.5 ulps. ' \ + 'For more informations visit .' + +class Cospi_u05(SrcOperator): + full_name = 'cosine of pi times argument' + signature = 'v cospi_u05 v' + sleef_symbol_prefix = 'nsimd_sleef_cospi_u05' + categories = [DocTrigo] + desc = 'Compute the cosine of pi times argument of its argument with ' \ + 'a precision of 0.5 ulps. ' \ + 'For more informations visit .' + +class Hypot_u05(SrcOperator): + full_name = 'Euclidean distance' + signature = 'v hypot_u05 v v' + sleef_symbol_prefix = 'nsimd_sleef_hypot_u05' + categories = [DocBasicArithmetic] + desc = 'Compute the Euclidean distance of its argument with a ' \ + 'precision of 0.5 ulps. ' \ + 'For more informations visit .' + +class Hypot_u35(SrcOperator): + full_name = 'Euclidean distance' + signature = 'v hypot_u35 v v' + sleef_symbol_prefix = 'nsimd_sleef_hypot_u35' + categories = [DocBasicArithmetic] + desc = 'Compute the Euclidean distance of its argument with a ' \ + 'precision of 3.5 ulps. ' \ + 'For more informations visit .' + +class Remainder(SrcOperator): + full_name = 'floating-point remainder' + signature = 'v remainder v v' + sleef_symbol_prefix = 'nsimd_sleef_remainder' + domain = [[1, 20], [1, 20]] + categories = [DocBasicArithmetic] + desc = 'Compute the floating-point remainder of its arguments. ' \ + 'For more informations visit .' + +class Fmod(SrcOperator): + full_name = 'floating-point remainder' + signature = 'v fmod v v' + sleef_symbol_prefix = 'nsimd_sleef_fmod' + domain = [[1, 20], [1, 20]] + categories = [DocBasicArithmetic] + desc = 'Compute the floating-point remainder of its argument. ' \ + 'For more informations visit .' + +class Lgamma_u10(SrcOperator): + full_name = 'log gamma' + signature = 'v lgamma_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_lgamma_u10' + domain = [[0.5, 20]] + categories = [DocExpLog] + desc = 'Compute the log gamma of its argument with a precision of ' \ + '1.0 ulps. ' \ + 'For more informations visit .' + +class Tgamma_u10(SrcOperator): + full_name = 'true gamma' + signature = 'v tgamma_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_tgamma_u10' + domain = [[0.5, 5]] + categories = [DocExpLog] + desc = 'Compute the true gamma of its argument with a precision of ' \ + '1.0 ulps. ' \ + 'For more informations visit .' + +class Erf_u10(SrcOperator): + full_name = 'complementary error' + signature = 'v erf_u10 v' + sleef_symbol_prefix = 'nsimd_sleef_erf_u10' + categories = [DocExpLog] + desc = 'Compute the complementary error of its argument with a ' \ + 'precision of 1.0 ulps. ' \ + 'For more informations visit .' + +class Erfc_u15(SrcOperator): + full_name = 'complementary error' + signature = 'v erfc_u15 v' + sleef_symbol_prefix = 'nsimd_sleef_erfc_u15' + categories = [DocExpLog] + desc = 'Compute the complementary error of its argument with a ' \ + 'precision of 1.5 ulps. ' \ + 'For more informations visit .' -# ----------------------------------------------------------------------------- -# Import other operators if present: this is not very Pythonic but it is -# simple and it works! - -import os -import sys -import io - -sep = ';' if sys.platform == 'win32' else ':' -search_dirs = os.getenv('NSIMD_OPERATORS_PATH') -if search_dirs != None: - dirs = search_dirs.split(sep) - for d in dirs: - operators_file = os.path.join(d, 'operators.py') - with io.open(operators_file, mode='r', encoding='utf-8') as fin: - exec(fin.read()) diff --git a/egg/platform_arm.py b/egg/platform_arm.py index d9399155..2b626e42 100644 --- a/egg/platform_arm.py +++ b/egg/platform_arm.py @@ -92,6 +92,13 @@ def get_simd_exts(): return ['neon128', 'aarch64', 'sve', 'sve128', 'sve256', 'sve512', 'sve1024', 'sve2048'] +def get_prev_simd_ext(simd_ext): + if simd_ext in ['neon128', 'aarch64']: + return 'cpu' + elif simd_ext in sve: + return 'aarch64' + raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) + def emulate_fp16(simd_ext): if not simd_ext in get_simd_exts(): raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) @@ -757,7 +764,7 @@ def addsub(op, simd_ext, typ): return 'return v{op}q_{suf}({in0}, {in1});'. \ format(op=op, **fmtspec) else: - return 'return sv{op}_{suf}_z({svtrue}, {in0}, {in1});'. \ + return 'return sv{op}_{suf}_x({svtrue}, {in0}, {in1});'. \ format(op=op, **fmtspec) # ----------------------------------------------------------------------------- @@ -773,7 +780,7 @@ def mul2(simd_ext, typ): if simd_ext in neon: return 'return vmulq_{suf}({in0}, {in1});'.format(**fmtspec) else: - return 'return svmul_{suf}_z({svtrue}, {in0}, {in1});'. \ + return 'return svmul_{suf}_x({svtrue}, {in0}, {in1});'. \ format(**fmtspec) # ----------------------------------------------------------------------------- @@ -784,7 +791,7 @@ def div2(simd_ext, typ): return 'return vdivq_{suf}({in0}, {in1});'.format(**fmtspec) elif simd_ext in sve and \ typ in ['f16', 'f32', 'f64', 'i32', 'u32', 'i64', 'u64']: - return 'return svdiv_{suf}_z({svtrue}, {in0}, {in1});'. \ + return 'return svdiv_{suf}_x({svtrue}, {in0}, {in1});'. \ format(**fmtspec) else: ret = f16f64(simd_ext, typ, 'div', 'div', 2) @@ -802,7 +809,7 @@ def binop2(op, simd_ext, typ): return 'return v{armop}q_{suf}({in0}, {in1});'. \ format(armop=armop[op], **fmtspec) else: - return 'return sv{armop}_{suf}_z({svtrue}, {in0}, {in1});'. \ + return 'return sv{armop}_{suf}_x({svtrue}, {in0}, {in1});'. \ format(armop=armop[op], **fmtspec) # From here only float types if typ == 'f16': @@ -824,7 +831,7 @@ def binop2(op, simd_ext, typ): else: return \ '''return svreinterpret_f{typnbits}_u{typnbits}( - sv{armop}_u{typnbits}_z({svtrue}, + sv{armop}_u{typnbits}_x({svtrue}, svreinterpret_u{typnbits}_f{typnbits}({in0}), svreinterpret_u{typnbits}_f{typnbits}({in1})));'''. \ format(armop=armop[op], **fmtspec) @@ -843,7 +850,7 @@ def not1(simd_ext, typ): vreinterpretq_u32_{suf}({in0})));'''. \ format(**fmtspec) if simd_ext in sve: - return 'return svnot_{suf}_z({svtrue}, {in0});'.format(**fmtspec) + return 'return svnot_{suf}_x({svtrue}, {in0});'.format(**fmtspec) # From here only float types if typ == 'f16': intrinsics = \ @@ -861,7 +868,7 @@ def not1(simd_ext, typ): format(**fmtspec) else: return \ - '''return svreinterpret_{suf}_u{typnbits}(svnot_u{typnbits}_z( + '''return svreinterpret_{suf}_u{typnbits}(svnot_u{typnbits}_x( {svtrue}, svreinterpret_u{typnbits}_{suf}({in0})));'''. \ format(**fmtspec) @@ -901,7 +908,7 @@ def lop2(opts, op, simd_ext, typ): # TODO: the casts are a workaround to avoid a bug in gcc trunk for sve # it needs to be deleted when the bug is corrected return \ - '''return sv{armop}_z({svtrue}, + '''return sv{armop}_x({svtrue}, (svuint{typnbits}_t){in0}, (svuint{typnbits}_t){in1});'''. \ format(armop=armop[op], **fmtspec) @@ -963,7 +970,7 @@ def sqrt1(simd_ext, typ): else: return 'return vsqrtq_{suf}({in0});'.format(**fmtspec) else: - return 'return svsqrt_{suf}_z({svtrue}, {in0});'.format(**fmtspec) + return 'return svsqrt_{suf}_x({svtrue}, {in0});'.format(**fmtspec) # ----------------------------------------------------------------------------- # Shifts @@ -985,7 +992,7 @@ def shl_shr(op, simd_ext, typ): armop = 'lsl' if op == 'shl' else 'lsr' if op == 'shr' and typ in common.itypes: return \ - '''return svreinterpret_{suf}_{suf2}(sv{armop}_{suf2}_z({svtrue}, + '''return svreinterpret_{suf}_{suf2}(sv{armop}_{suf2}_x({svtrue}, svreinterpret_{suf2}_{suf}({in0}), svdup_n_u{typnbits}((u{typnbits}){in1})));'''. \ format(suf2=common.bitfield_type[typ], armop=armop, @@ -1195,7 +1202,7 @@ def minmax2(op, simd_ext, typ): return 'return v{op}q_{suf}({in0}, {in1});'. \ format(op=op, **fmtspec) else: - return 'return sv{op}_{suf}_z({svtrue}, {in0}, {in1});'. \ + return 'return sv{op}_{suf}_x({svtrue}, {in0}, {in1});'. \ format(op=op, **fmtspec) # ----------------------------------------------------------------------------- @@ -1215,7 +1222,7 @@ def abs1(simd_ext, typ): else: return f16f64(simd_ext, 'f64', 'abs', 'abs', 1) else: - return 'return svabs_{suf}_z({svtrue}, {in0});'. \ + return 'return svabs_{suf}_x({svtrue}, {in0});'. \ format(**fmtspec) # ----------------------------------------------------------------------------- @@ -1241,7 +1248,7 @@ def round1(op, simd_ext, typ): else: armop = {'floor': 'rintm', 'ceil': 'rintp', 'trunc': 'rintz', 'round_to_even': 'rintn'} - return 'return sv{armop}_{suf}_z({svtrue}, {in0});'. \ + return 'return sv{armop}_{suf}_x({svtrue}, {in0});'. \ format(armop=armop[op], **fmtspec) # ----------------------------------------------------------------------------- @@ -1280,7 +1287,7 @@ def fmafnma3(op, simd_ext, typ): else: return normal else: - return 'return sv{armop}_{suf}_z({svtrue}, {in2}, {in1}, {in0});'. \ + return 'return sv{armop}_{suf}_x({svtrue}, {in2}, {in1}, {in0});'. \ format(armop=armop[op], **fmtspec) # ----------------------------------------------------------------------------- @@ -1299,7 +1306,7 @@ def fmsfnms3(op, simd_ext, typ): format(op2='fma' if op == 'fms' else 'fnma', **fmtspec) else: armop = {'fnms': 'nmla', 'fms': 'nmls'} - return 'return sv{armop}_{suf}_z({svtrue}, {in2}, {in1}, {in0});'. \ + return 'return sv{armop}_{suf}_x({svtrue}, {in2}, {in1}, {in0});'. \ format(armop=armop[op], **fmtspec) # ----------------------------------------------------------------------------- @@ -1337,11 +1344,11 @@ def neg1(simd_ext, typ): if typ in common.utypes: return \ '''return svreinterpret_{suf}_s{typnbits}( - svneg_s{typnbits}_z({svtrue}, + svneg_s{typnbits}_x({svtrue}, svreinterpret_s{typnbits}_{suf}({in0})));'''. \ format(**fmtspec) else: - return 'return svneg_{suf}_z({svtrue}, {in0});'.format(**fmtspec) + return 'return svneg_{suf}_x({svtrue}, {in0});'.format(**fmtspec) # ----------------------------------------------------------------------------- # Reciprocals @@ -1596,7 +1603,7 @@ def convert1(simd_ext, from_typ, to_typ): return 'return svreinterpret_{to_suf}_{from_suf}({in0});'. \ format(**fmtspec2) if simd_ext in sve: - return 'return svcvt_{to_suf}_{from_suf}_z({svtrue}, {in0});'. \ + return 'return svcvt_{to_suf}_{from_suf}_x({svtrue}, {in0});'. \ format(**fmtspec2) to_f16_with_f32 = \ '''nsimd_{simd_ext}_vf16 ret; @@ -1919,9 +1926,9 @@ def upcvt1(simd_ext, from_typ, to_typ): elif from_typ in common.iutypes and to_typ in common.ftypes: return \ '''nsimd_{simd_ext}_v{to_typ}x2 ret; - ret.v0 = svcvt_{suf_to_typ}_{suf_int_typ}_z( + ret.v0 = svcvt_{suf_to_typ}_{suf_int_typ}_x( {svtrue}, svunpklo_{suf_int_typ}({in0})); - ret.v1 = svcvt_{suf_to_typ}_{suf_int_typ}_z( + ret.v1 = svcvt_{suf_to_typ}_{suf_int_typ}_x( {svtrue}, svunpkhi_{suf_int_typ}({in0})); return ret;'''. \ format(suf_to_typ=suf(to_typ), @@ -1929,9 +1936,9 @@ def upcvt1(simd_ext, from_typ, to_typ): else: return \ '''nsimd_{simd_ext}_v{to_typ}x2 ret; - ret.v0 = svcvt_{suf_to_typ}_{suf}_z({svtrue}, svzip1_{suf}( + ret.v0 = svcvt_{suf_to_typ}_{suf}_x({svtrue}, svzip1_{suf}( {in0}, {in0})); - ret.v1 = svcvt_{suf_to_typ}_{suf}_z({svtrue}, svzip2_{suf}( + ret.v1 = svcvt_{suf_to_typ}_{suf}_x({svtrue}, svzip2_{suf}( {in0}, {in0})); return ret;'''.format(suf_to_typ=suf(to_typ), **fmtspec) @@ -1989,16 +1996,16 @@ def downcvt1(simd_ext, from_typ, to_typ): elif from_typ in common.ftypes and to_typ in common.iutypes: return \ '''return svuzp1_{suf_to_typ}(svreinterpret_{suf_to_typ}_{suf_int_typ}( - svcvt_{suf_int_typ}_{suf}_z({svtrue}, {in0})), + svcvt_{suf_int_typ}_{suf}_x({svtrue}, {in0})), svreinterpret_{suf_to_typ}_{suf_int_typ}( - svcvt_{suf_int_typ}_{suf}_z({svtrue}, {in1})));'''. \ + svcvt_{suf_int_typ}_{suf}_x({svtrue}, {in1})));'''. \ format(suf_to_typ=suf(to_typ), suf_int_typ=suf(to_typ[0] + from_typ[1:]), **fmtspec) else: return \ - '''return svuzp1_{suf_to_typ}(svcvt_{suf_to_typ}_{suf}_z( - {svtrue}, {in0}), svcvt_{suf_to_typ}_{suf}_z( + '''return svuzp1_{suf_to_typ}(svcvt_{suf_to_typ}_{suf}_x( + {svtrue}, {in0}), svcvt_{suf_to_typ}_{suf}_x( {svtrue}, {in1}));'''. \ format(suf_to_typ=suf(to_typ), **fmtspec) @@ -2007,25 +2014,22 @@ def downcvt1(simd_ext, from_typ, to_typ): def adds(simd_ext, from_typ): if from_typ in common.ftypes: - return 'return nsimd_add_{simd_ext}_{from_typ}({in0}, {in1});'.format(**fmtspec) - - if simd_ext in neon: - return 'return vqaddq_{suf}({in0}, {in1});'. \ + return 'return nsimd_add_{simd_ext}_{from_typ}({in0}, {in1});'. \ format(**fmtspec) + if simd_ext in neon: + return 'return vqaddq_{suf}({in0}, {in1});'.format(**fmtspec) else: - return 'return svqadd_{suf}({in0}, {in1});'. \ - format(**fmtspec) + return 'return svqadd_{suf}({in0}, {in1});'.format(**fmtspec) # ----------------------------------------------------------------------------- # subs def subs(simd_ext, from_typ): if from_typ in common.ftypes: - return 'return nsimd_sub_{simd_ext}_{from_typ}({in0}, {in1});'.format(**fmtspec) - + return 'return nsimd_sub_{simd_ext}_{from_typ}({in0}, {in1});'. \ + format(**fmtspec) elif simd_ext in neon: return 'return vqsubq_{suf}({in0}, {in1});'.format(**fmtspec) - else: return 'return svqsub_{suf}({in0}, {in1});'.format(**fmtspec) @@ -2064,8 +2068,7 @@ def to_mask1(opts, simd_ext, typ): return '''return svreinterpret_{suf}_{utyp}(svsel_{utyp}( {in0}, svdup_n_{utyp}(({utyp})-1), svdup_n_{utyp}(({utyp})0)));'''. \ - format(utyp=utyp, **fmtspec) - + format(utyp=utyp, **fmtspec) else: return normal @@ -2078,7 +2081,7 @@ def iota(simd_ext, typ): return 'return svindex_{suf}(0, 1);'.format(**fmtspec) else: return \ - '''return svcvt_{suf}_s{typnbits}_z({svtrue}, + '''return svcvt_{suf}_s{typnbits}_x({svtrue}, svindex_s{typnbits}(0, 1));'''.format(**fmtspec) if typ == 'f64' and simd_ext == 'neon128': return '''nsimd_neon128_vf64 ret; diff --git a/egg/platform_cpu.py b/egg/platform_cpu.py index 48347447..c94f50bd 100644 --- a/egg/platform_cpu.py +++ b/egg/platform_cpu.py @@ -46,6 +46,11 @@ def get_nb_el(typ): def get_simd_exts(): return ['cpu'] +def get_prev_simd_ext(simd_ext): + if simd_ext != 'cpu': + raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) + return '' + def get_simd_strings(simd_ext): if simd_ext == 'cpu': return ['cpu'] @@ -90,7 +95,8 @@ def get_additional_include(func, platform, simd_ext): if func in ['adds', 'subs', 'orb', 'andb', 'andnotb', 'xorb', 'min', 'max' 'notb', 'sqrt', 'shr', 'shl', 'shra', 'abs', 'fma', 'fnma', 'fms', 'fnms', 'ceil', 'floor', 'trunc', 'round_to_even', - 'rec11', 'rec8', 'rsqrt11', 'rsqrt8', 'rec', 'neg']: + 'rec11', 'rec8', 'rsqrt11', 'rsqrt8', 'rec', 'neg', + 'lgamma_u10', 'tgamma_u10', 'erf_u10', 'erfc_u15']: return '''#include ''' elif func == 'zip': diff --git a/egg/platform_ppc.py b/egg/platform_ppc.py new file mode 100644 index 00000000..3e53073b --- /dev/null +++ b/egg/platform_ppc.py @@ -0,0 +1,1966 @@ +# Copyright (c) 2021 Agenium Scale +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# This file gives the implementation for the Power PC platform. +# This script tries to be as readable as possible. It implements VMX and VSX. + +# Documentation found from: +# https://www.nxp.com/docs/en/reference-manual/ALTIVECPIM.pdf +# https://www.ibm.com/docs/en/xl-c-and-cpp-linux/13.1.6?topic=functions-vector-built-in +# https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/PowerPC-AltiVec-Built-in-Functions-Available-on-ISA-2_002e06.html + +import common + +fmtspec = {} + +# ----------------------------------------------------------------------------- +# Helpers + +def has_to_be_emulated(simd_ext, typ): + if typ == 'f16': + return True + if simd_ext == 'vmx' and typ in ['f64', 'i64', 'u64']: + return True + return False + +# Returns the power pc type corresponding to the nsimd type +def native_type(typ): + if typ == 'u8': + return '__vector unsigned char' + elif typ == 'i8': + return '__vector signed char' + elif typ == 'u16': + return '__vector unsigned short' + elif typ == 'i16': + return '__vector signed short' + elif typ == 'u32': + return '__vector unsigned int' + elif typ == 'u64': + return '__vector unsigned long long' + elif typ == 'i32': + return '__vector signed int' + elif typ == 'i64': + return '__vector signed long long' + elif typ == 'f32': + return '__vector float' + elif typ == 'f64': + return '__vector double' + else: + raise ValueError('Type "{}" not supported'.format(typ)) + +# Returns the logical power pc type corresponding to the nsimd type +def native_typel(typ): + if typ in ['i8', 'u8']: + return '__vector __bool char' + elif typ in ['i16', 'u16']: + return '__vector __bool short' + elif typ in ['i32', 'u32', 'f32']: + return '__vector __bool int' + elif typ in ['f64', 'i64', 'u64']: + return '__vector __bool long long' + else: + raise ValueError('Type "{}" not supported'.format(typ)) + +# Length of a vector with elements of type typ +def get_len(typ): + return 128 // int(typ[1:]) + +# Emulate 64 bits types for vmx only +def emulate_64(op, typ, params): + def arg(param, i): + if param == 'v': + return '{}.v{{i}}'.format(common.get_arg(i)) + elif param == 'l': + return '(int)({}.v{{i}} & ((u64)1))'.format(common.get_arg(i)) + else: + return common.get_arg(i) + args = ', '.join(arg(params[i + 1], i) for i in range(len(params[1:]))) + args0 = args.format(i=0) + args1 = args.format(i=1) + if params[0] == 'v': + return '''nsimd_vmx_v{typ} ret; + ret.v0 = nsimd_scalar_{op}_{typ}({args0}); + ret.v1 = nsimd_scalar_{op}_{typ}({args1}); + return ret;'''. \ + format(typ=typ, op=op, args0=args0, args1=args1) + else: + return \ + '''nsimd_vmx_vl{typ} ret; + ret.v0 = (u64)(nsimd_scalar_{op}{suf}({args0}) ? -1 : 0); + ret.v1 = (u64)(nsimd_scalar_{op}{suf}({args1}) ? -1 : 0); + return ret;'''. \ + format(suf='' if params == ['l'] * len(params) else '_' + typ, + typ=typ, op=op, args0=args0, args1=args1) + +def emulate_f16(op, simd_ext, params): + tmpl = ', '.join(['{{in{}}}.v{{{{i}}}}'.format(i).format(**fmtspec) \ + for i in range(len(params[1:]))]) + args1 = tmpl.format(i=0) + args2 = tmpl.format(i=1) + l = 'l' if params[0] == 'l' else '' + return '''nsimd_{simd_ext}_v{l}f16 ret; + ret.v0 = nsimd_{op}_{simd_ext}_f32({args1}); + ret.v1 = nsimd_{op}_{simd_ext}_f32({args2}); + return ret;'''. \ + format(l=l, op=op, args1=args1, args2=args2, **fmtspec) + +def emulation_code(op, simd_ext, typ, params): + if typ == 'f16': + return emulate_f16(op, simd_ext, params) + elif simd_ext == 'vmx' and typ in ['f64', 'i64', 'u64']: + return emulate_64(op, typ, params) + else: + raise ValueError('Automatic emulation for {}/{}/{} is not supported'. \ + format(func, simd_ext, typ)) + +def emulate_with_scalar(op, simd_ext, typ, params): + def arg(param, i): + if param == 'v': + return 'vec_extract({}, {{i}})'.format(common.get_arg(i)) + elif param == 'l': + return '(int)(vec_extract({}, {{i}}) & ((u{})1))'. \ + format(common.get_arg(i), typ[1:]) + else: + return common.get_arg(i) + args = ', '.join(arg(params[i + 1], i) for i in range(len(params[1:]))) + if params[0] == 'v': + return '''nsimd_{simd_ext}_v{typ} ret; + ret = vec_splats(nsimd_scalar_{op}_{typ}({args0})); + '''.format(typ=typ, op=op, args0=args.format(i=0), + simd_ext=simd_ext) + '\n' + \ + '\n'.join('ret = vec_insert('\ + 'nsimd_scalar_{op}_{typ}({argsi}), ret, {i});'. \ + format(op=op, typ=typ, argsi=args.format(i=i), i=i) \ + for i in range(1, get_len(typ))) + '\nreturn ret;' + else: + utyp = 'u' + typ[1:] + return \ + '''nsimd_{simd_ext}_vl{typ} ret; + ret = ({ppc_typl})vec_splats(({utyp})( + nsimd_scalar_{op}_{typ}({args0}) ? -1 : 0)); + '''.format(typ=typ, op=op, args0=args.format(i=0), utyp=utyp, + ppc_typl=native_typel(typ), simd_ext=simd_ext) + '\n' + \ + '\n'.join( + 'ret = ({ppc_typl})vec_insert(({utyp})(' \ + 'nsimd_scalar_{op}_{typ}({argsi}) ? -1 : 0), ret, {i});'. \ + format(op=op, typ=typ, utyp=utyp, argsi=args.format(i=i), + ppc_typl=native_typel(typ), i=i) \ + for i in range(1, get_len(typ))) + '\nreturn ret;' + +# ----------------------------------------------------------------------------- +# Implementation of mandatory functions for this module + +def emulate_fp16(simd_ext): + return True + +def get_simd_exts(): + return ['vmx', 'vsx'] + +def get_type(opts, simd_ext, typ, nsimd_typ): + if simd_ext not in get_simd_exts(): + raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) + if typ not in common.types: + raise ValueError('Unknown type "{}"'.format(typ)) + if typ == 'f16': + struct = 'struct {__vector float v0; __vector float v1;}' + elif simd_ext == 'vmx' and typ in ['i64', 'u64', 'f64']: + struct = 'struct {{ {} v0; {} v1; }}'.format(typ, typ) + else: + struct = native_type(typ) + return 'typedef {} {};'.format(struct, nsimd_typ) + +def get_logical_type(opts, simd_ext, typ, nsimd_typ): + if simd_ext not in get_simd_exts(): + raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) + if typ not in common.types: + raise ValueError('Unknown type "{}"'.format(typ)) + if typ == 'f16': + struct = 'struct {__vector __bool int v0; __vector __bool int v1;}' + elif simd_ext == 'vmx' and typ in ['i64', 'u64', 'f64']: + struct = 'struct { u64 v0; u64 v1; }' + else: + struct = native_typel(typ) + return 'typedef {} {};'.format(struct, nsimd_typ) + +def get_nb_registers(simd_ext): + if simd_ext == 'vsx': + return '64' + elif simd_ext == 'vmx': + return '32' + else: + raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) + +def has_compatible_SoA_types(simd_ext): + if simd_ext in get_simd_exts(): + return False + else: + raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) + +def get_additional_include(func, platform, simd_ext): + ret = '''#include + '''.format(func) + if simd_ext == 'vsx': + ret += '''#include + '''.format(func) + + if func == 'neq': + ret += '''#include + #include + '''.format(simd_ext=simd_ext) + + elif func in ['loadlu', 'loadla']: + ret += '''#include + #include + #include + #include + '''.format(load='load' + func[5], **fmtspec) + + elif func in ['storelu']: + ret += '''#include + #include + '''.format(**fmtspec) + + elif func in ['shr', 'shl']: + ret += '''#include + '''.format(**fmtspec) + + elif func == "shra": + ret += '''#include + ''' + + elif func in ['zip', 'unzip']: + ret += '''#include + #include + '''.format(unzip_prefix="" if func == "zip" else "un", + **fmtspec) + + elif func in ['unziplo', 'unziphi']: + ret += '''#include + #include + #include + '''.format(**fmtspec) + + elif func[:5] in ['masko', 'maskz']: + ret += '''#include + ''' + + elif func == 'mask_for_loop_tail': + ret += '''#include + #include + #include + #include + '''.format(simd_ext=simd_ext) + + elif func[:4] == 'load': + ret += ''' + #include + + #define NSIMD_PERMUTE_MASK_64(a, b) \ + {{ (unsigned char)(8 * a), (unsigned char)(8 * a + 1), \ + (unsigned char)(8 * b), (unsigned char)(8 * b + 1) }} + + + #define NSIMD_PERMUTE_MASK_32(a, b, c, d) \ + {{ (unsigned char)(4 * a), (unsigned char)(4 * a + 1), \ + (unsigned char)(4 * a + 2), (unsigned char)(4 * a + 3), \ + (unsigned char)(4 * b), (unsigned char)(4 * b + 1), \ + (unsigned char)(4 * b + 2), (unsigned char)(4 * b + 3), \ + (unsigned char)(4 * c), (unsigned char)(4 * c + 1), \ + (unsigned char)(4 * c + 2), (unsigned char)(4 * c + 3), \ + (unsigned char)(4 * d), (unsigned char)(4 * d + 1), \ + (unsigned char)(4 * d + 2), (unsigned char)(4 * d + 3) }} + + #define NSIMD_PERMUTE_MASK_16(a, b, c, d, e, f, g, h) \ + {{ (unsigned char)(2 * a + 0), (unsigned char)(2 * a + 1), \ + (unsigned char)(2 * b + 0), (unsigned char)(2 * b + 1), \ + (unsigned char)(2 * c + 0), (unsigned char)(2 * c + 1), \ + (unsigned char)(2 * d + 0), (unsigned char)(2 * d + 1), \ + (unsigned char)(2 * e + 0), (unsigned char)(2 * e + 1), \ + (unsigned char)(2 * f + 0), (unsigned char)(2 * f + 1), \ + (unsigned char)(2 * g + 0), (unsigned char)(2 * g + 1), \ + (unsigned char)(2 * h + 0), (unsigned char)(2 * h + 1) }} + + #define NSIMD_PERMUTE_MASK_8(a, b, c, d, e, f, g, h, \ + i, j, k, l, m, n, o, p) \ + {{ (unsigned char)(a), (unsigned char)(b), \ + (unsigned char)(c), (unsigned char)(d), \ + (unsigned char)(e), (unsigned char)(f), \ + (unsigned char)(g), (unsigned char)(h), \ + (unsigned char)(i), (unsigned char)(j), \ + (unsigned char)(k), (unsigned char)(l), \ + (unsigned char)(m), (unsigned char)(n), \ + (unsigned char)(o), (unsigned char)(p) }} + '''.format(**fmtspec) + + return ret + +# ----------------------------------------------------------------------------- + +def printf2(*args0): + """ + debugging purposes + decorate the function with it and when executed on test, it will print the + environnements *args0 are the name of var to printf + """ + to_print = [] + for arg in args0: + if isinstance(arg, str): + to_print.append(arg) + + def decorator(func): + import inspect + + def wrapper(*args, **kwargs): + func_args = inspect.signature(func).bind(*args, **kwargs).arguments + func_args_str = '{} called on {}\\n'. \ + format(func.__name__, fmtspec['typ']) + \ + ', "'.join('{} = {!r}'.format(*item) \ + for item in func_args.items()) + ret = '' + if not DEBUG: + return func(*args) + typ = '' + if 'typ' in func_args: + typ = func_args['typ'] + else: + typ = func_args['from_typ'] + ret += 'int k;\n' + if func.__name__ == 'store1234' and typ in ['f64', 'i64', 'u64']: + ret += ''' + printf("element to store: %ld %ld", {in1}{suf0}, + {in1}{suf1}); + printf("\\n"); + '''.format(**fmtspec, **get_suf64(typ)) + elif func.__name__ == 'store1234' and typ[1:] == '32': + ret += ''' + printf("element to store:"); + for (k = 0; k < 4; k++) {{ + printf(" %lx", {in1}[k]); + }} + printf("\\n"); + '''.format(**fmtspec, nbits=get_len(typ)) + #print var passed as parameter on printf2 + for var in to_print: + if ppc_is_vec_type(typ): + ret += ''' + printf("values of {var}:"); + for (k = 0; k < {nbits}; k++) {{ + printf(" %lld", {var}[k]); + }} + printf("\\n"); + '''.format(var=var, **fmtspec, nbits=get_len(typ)) + return ''' + printf("\\n---------------\\n"); + printf("{}.{} ( {} )\\n"); + '''.format(func.__module__, func.__qualname__, + func_args_str) + ret + func(*args) + + return wrapper + + return decorator + + +# ----------------------------------------------------------------------------- +# Loads of degree 1, 2, 3 and 4 +# About unaligned loads/stores for Altivec: +# https://developer.ibm.com/technologies/systems/articles/pa-dalign/ + +def load1234(simd_ext, typ, deg, aligned): + if typ in ['f64', 'i64', 'u64']: + if deg == 1: + if simd_ext == 'vmx': + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = {in0}[0]; + ret.v1 = {in0}[1]; + return ret;'''.format(**fmtspec) + else: + return '''nsimd_{simd_ext}_v{typ} ret; + ret = vec_splats({in0}[0]); + ret = vec_insert({in0}[1], ret, 1); + return ret;'''.format(**fmtspec) + else: + if simd_ext == 'vmx': + return \ + 'nsimd_{simd_ext}_v{typ}x{} ret;\n'.format(deg, **fmtspec) + \ + '\n'.join(['ret.v{i}.v0 = *({in0} + {i});'. \ + format(i=i, **fmtspec) \ + for i in range(0, deg)]) + \ + '\n'.join(['ret.v{i}.v1 = *({in0} + {ipd});'. \ + format(i=i, ipd=i + deg, **fmtspec) \ + for i in range(0, deg)]) + \ + '\nreturn ret;' + else: + return \ + 'nsimd_{simd_ext}_v{typ}x{} ret;\n'.format(deg, **fmtspec) + \ + '\n'.join( + 'ret.v{i} = vec_splats({in0}[{i}]);'.format(i=i, **fmtspec) \ + for i in range(0, deg)) + \ + '\n'.join( + 'ret.v{i} = vec_insert({in0}[{ipd}], ret.v{i}, 1);'. \ + format(i=i, ipd=i + deg, **fmtspec) for i in range(0, deg)) + \ + '\nreturn ret;' + if typ == 'f16': + if deg == 1: + return \ + '''nsimd_{simd_ext}_vf16 ret; + u16 *ptr = (u16 *){in0}; + ret.v0 = vec_splats(nsimd_u16_to_f32(ptr[0])); + ret.v0 = vec_insert(nsimd_u16_to_f32(ptr[1]), ret.v0, 1); + ret.v0 = vec_insert(nsimd_u16_to_f32(ptr[2]), ret.v0, 2); + ret.v0 = vec_insert(nsimd_u16_to_f32(ptr[3]), ret.v0, 3); + ret.v1 = vec_splats(nsimd_u16_to_f32(ptr[4])); + ret.v1 = vec_insert(nsimd_u16_to_f32(ptr[5]), ret.v1, 1); + ret.v1 = vec_insert(nsimd_u16_to_f32(ptr[6]), ret.v1, 2); + ret.v1 = vec_insert(nsimd_u16_to_f32(ptr[7]), ret.v1, 3); + return ret;'''.format(**fmtspec) + else: + ret = '''nsimd_{simd_ext}_vf16x{deg} ret; + u16 *ptr = (u16 *){in0}; + '''.format(deg=deg, **fmtspec) + + for i in range(0, deg): + for k in range(0, 2): + ret += 'ret.v{}.v{} = vec_splats(' \ + 'nsimd_u16_to_f32(ptr[{}]));\n'. \ + format(i, k, i + k * 4 * deg) + for j in range(1, 4): + ret += 'ret.v{i}.v{k} = vec_insert(nsimd_u16_to_f32(' \ + 'ptr[{o}]), ret.v{i}.v{k}, {j});\n'. \ + format(i=i, k=k, j=j, + o=i + k * 4 * deg + j * deg) + ret += 'return ret;' + return ret + if deg == 1: + if aligned: + return 'return vec_ld(0, {in0});'.format(**fmtspec) + else: + return 'return *({ppc_typ}*){in0};'. \ + format(ppc_typ=native_type(typ), **fmtspec) + + # From here deg >= 2 + + if aligned: + load = 'nsimd_{simd_ext}_v{typ}x{deg} ret;\n'. \ + format(deg=deg, **fmtspec) + \ + '\n'.join( + 'nsimd_{simd_ext}_v{typ} in{i} = vec_ld({o}, {in0});'. \ + format(i=i, o=i * 16, **fmtspec) for i in range(deg)) + else: + load = \ + 'nsimd_{simd_ext}_v{typ}x{deg} ret;\n'. \ + format(deg=deg, **fmtspec) + \ + '\n'.join( + 'nsimd_{simd_ext}_v{typ} in{i} = *(({ppc_typ}*){in0} + {i});'. \ + format(i=i, ppc_typ=native_type(typ), **fmtspec) \ + for i in range(0, deg)) + if deg == 2: + return '''{load} + ret = nsimd_unzip_{simd_ext}_{typ}(in0, in1); + return ret;'''.format(load=load, **fmtspec) + elif deg == 3: + if typ in ['i32', 'u32', 'f32']: + return \ + '''__vector unsigned char perm1 = NSIMD_PERMUTE_MASK_32( + 0, 3, 6, 0); + + {load} + + nsimd_{simd_ext}_v{typ} tmp0 = vec_perm(in0, in1, perm1); + nsimd_{simd_ext}_v{typ} tmp1 = vec_perm(in1, in2, perm1); + nsimd_{simd_ext}_v{typ} tmp2 = vec_perm(in2, in0, perm1); + + __vector unsigned char perm2 = NSIMD_PERMUTE_MASK_32( + 0, 1, 2, 5); + __vector unsigned char perm3 = NSIMD_PERMUTE_MASK_32( + 5, 0, 1, 2); + __vector unsigned char perm4 = NSIMD_PERMUTE_MASK_32( + 2, 5, 0, 1); + + ret.v0 = vec_perm(tmp0, in2, perm2); + ret.v1 = vec_perm(tmp1, in0, perm3); + ret.v2 = vec_perm(tmp2, in1, perm4); + + return ret;'''.format(load=load, **fmtspec) + elif typ in ['i16', 'u16']: + return \ + '''{load} + + __vector unsigned char permRAB = NSIMD_PERMUTE_MASK_16( + 0, 3, 6, 9, 12, 15, 0, 0); + __vector unsigned char permRDC = NSIMD_PERMUTE_MASK_16( + 0, 1, 2, 3, 4, 5, 10, 13); + + nsimd_{simd_ext}_v{typ} tmp0 = vec_perm(in0, in1, permRAB); + ret.v0 = vec_perm(tmp0, in2, permRDC); + + __vector unsigned char permGAB = NSIMD_PERMUTE_MASK_16( + 1, 4, 7, 10, 13, 0, 0, 0); + __vector unsigned char permGEC = NSIMD_PERMUTE_MASK_16( + 0, 1, 2, 3, 4, 8, 11, 14); + + nsimd_{simd_ext}_v{typ} tmp1 = vec_perm(in0, in1, permGAB); + ret.v1 = vec_perm(tmp1, in2, permGEC); + + __vector unsigned char permBAB = NSIMD_PERMUTE_MASK_16( + 2, 5, 8, 11, 14, 0, 0, 0); + __vector unsigned char permBFC = NSIMD_PERMUTE_MASK_16( + 0, 1, 2, 3, 4, 9, 12, 15); + + nsimd_{simd_ext}_v{typ} tmp2 = vec_perm(in0, in1, permBAB); + ret.v2 = vec_perm(tmp2, in2, permBFC); + + return ret;'''.format(load=load, **fmtspec) + elif typ in ['i8', 'u8']: + return \ + '''{load} + + __vector unsigned char permRAB = NSIMD_PERMUTE_MASK_8( + 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 0, 0, 0, 0, 0); + __vector unsigned char permRDC = NSIMD_PERMUTE_MASK_8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 20, 23, 26, 29); + + nsimd_{simd_ext}_v{typ} tmp0 = vec_perm(in0, in1, permRAB); + ret.v0 = vec_perm(tmp0, in2, permRDC); + + __vector unsigned char permGAB = NSIMD_PERMUTE_MASK_8( + 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 0, 0, 0, 0, 0); + __vector unsigned char permGEC = NSIMD_PERMUTE_MASK_8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 18, 21, 24, 27, 30); + + nsimd_{simd_ext}_v{typ} tmp1 = vec_perm(in0, in1, permGAB); + ret.v1 = vec_perm(tmp1, in2, permGEC); + + __vector unsigned char permBAB = NSIMD_PERMUTE_MASK_8( + 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 0, 0, 0, 0, 0); + __vector unsigned char permBFC = NSIMD_PERMUTE_MASK_8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 19, 22, 25, 28, 31); + + nsimd_{simd_ext}_v{typ} tmp2 = vec_perm(in0, in1, permBAB); + ret.v2 = vec_perm(tmp2, in2, permBFC); + + return ret;'''.format(load=load, **fmtspec) + else: + if typ in ['i32', 'u32', 'f32']: + return \ + '''{load} + + nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh(in0, in2); + nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel(in0, in2); + nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh(in1, in3); + nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel(in1, in3); + + ret.v0 = vec_mergeh(tmp0, tmp2); + ret.v1 = vec_mergel(tmp0, tmp2); + ret.v2 = vec_mergeh(tmp1, tmp3); + ret.v3 = vec_mergel(tmp1, tmp3); + + return ret;'''.format(load=load, **fmtspec) + elif typ in ['i16', 'u16']: + return \ + '''{load} + + ret.v0 = vec_mergeh(in0, in2); + ret.v1 = vec_mergel(in0, in2); + ret.v2 = vec_mergeh(in1, in3); + ret.v3 = vec_mergel(in1, in3); + + nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh(ret.v0, ret.v2); + nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel(ret.v0, ret.v2); + nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh(ret.v1, ret.v3); + nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel(ret.v1, ret.v3); + + ret.v0 = vec_mergeh(tmp0, tmp2); + ret.v1 = vec_mergel(tmp0, tmp2); + ret.v2 = vec_mergeh(tmp1, tmp3); + ret.v3 = vec_mergel(tmp1, tmp3); + + return ret;'''.format(load=load, **fmtspec) + elif typ in ['i8', 'u8']: + return \ + '''{load} + + nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh(in0, in2); + nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel(in0, in2); + nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh(in1, in3); + nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel(in1, in3); + + ret.v0 = vec_mergeh(tmp0, tmp2); + ret.v1 = vec_mergel(tmp0, tmp2); + ret.v2 = vec_mergeh(tmp1, tmp3); + ret.v3 = vec_mergel(tmp1, tmp3); + + tmp0 = vec_mergeh(ret.v0, ret.v2); + tmp1 = vec_mergel(ret.v0, ret.v2); + tmp2 = vec_mergeh(ret.v1, ret.v3); + tmp3 = vec_mergel(ret.v1, ret.v3); + + ret.v0 = vec_mergeh(tmp0, tmp2); + ret.v1 = vec_mergel(tmp0, tmp2); + ret.v2 = vec_mergeh(tmp1, tmp3); + ret.v3 = vec_mergel(tmp1, tmp3); + + return ret;'''.format(load=load, **fmtspec) + +# ----------------------------------------------------------------------------- +# Stores of degree 1, 2, 3 and 4 + +def store1234(simd_ext, typ, deg, aligned): + if typ in ['f64', 'i64', 'u64']: + if simd_ext == 'vmx': + return '\n'.join('{}[{}] = {}.v0;'. \ + format(common.in0, i, common.get_arg(i + 1)) \ + for i in range(deg)) + '\n' + \ + '\n'.join('{}[{}] = {}.v1;'. \ + format(common.in0, i + deg, + common.get_arg(i + 1)) for i in range(deg)) + else: + return '\n'.join('{}[{}] = vec_extract({}, 0);'. \ + format(common.in0, i, common.get_arg(i + 1)) \ + for i in range(deg)) + '\n' + \ + '\n'.join('{}[{}] = vec_extract({}, 1);'. \ + format(common.in0, i + deg, + common.get_arg(i + 1)) for i in range(deg)) + if typ == 'f16': + if deg == 1: + return \ + '''u16 *ptr = (u16 *){in0}; + ptr[0] = nsimd_f32_to_u16(vec_extract({in1}.v0, 0)); + ptr[1] = nsimd_f32_to_u16(vec_extract({in1}.v0, 1)); + ptr[2] = nsimd_f32_to_u16(vec_extract({in1}.v0, 2)); + ptr[3] = nsimd_f32_to_u16(vec_extract({in1}.v0, 3)); + ptr[4] = nsimd_f32_to_u16(vec_extract({in1}.v1, 0)); + ptr[5] = nsimd_f32_to_u16(vec_extract({in1}.v1, 1)); + ptr[6] = nsimd_f32_to_u16(vec_extract({in1}.v1, 2)); + ptr[7] = nsimd_f32_to_u16(vec_extract({in1}.v1, 3));'''. \ + format(**fmtspec) + else: + ret = 'u16 *ptr = (u16 *){in0};\n'.format(**fmtspec) + for i in range(0, deg): + for k in range(0, 2): + for j in range(0, 4): + ret += 'ptr[{o}] = nsimd_f32_to_u16(' \ + 'vec_extract({a}.v{k}, {j}));\n'. \ + format(a=common.get_arg(i + 1), j=j, k=k, + o=i + k * 4 * deg + j * deg, **fmtspec) + return ret + if deg == 1: + if aligned: + return 'vec_st({in1}, 0, {in0});'.format(**fmtspec) + else: + return '*({ppc_typ} *){in0} = {in1};'. \ + format(ppc_typ=native_type(typ), **fmtspec) + + # From here deg >= 2 + + if aligned: + store = '\n'.join('vec_st(ret{i}, {o}, {in0});'. \ + format(i=i, o=i * 16, **fmtspec) \ + for i in range(0, deg)) + else: + store = '\n'.join('*({ppc_typ} *)({in0} + {o}) = ret{i};'. \ + format(o=i * get_len(typ), ppc_typ=native_type(typ), + i=i, **fmtspec) for i in range(deg)) + if deg == 2: + return \ + '''nsimd_{simd_ext}_v{typ} ret0 = vec_mergeh({in1}, {in2}); + nsimd_{simd_ext}_v{typ} ret1 = vec_mergel({in1}, {in2}); + + {store}'''.format(store=store, **fmtspec) + elif deg == 3: + if typ in ['i32', 'u32', 'f32']: + return \ + '''__vector unsigned char perm1 = NSIMD_PERMUTE_MASK_32( + 0, 2, 4, 6); + __vector unsigned char perm2 = NSIMD_PERMUTE_MASK_32( + 0, 2, 5, 7); + __vector unsigned char perm3 = NSIMD_PERMUTE_MASK_32( + 1, 3, 5, 7); + + nsimd_{simd_ext}_v{typ} tmp0 = vec_perm({in1}, {in2}, perm1); + nsimd_{simd_ext}_v{typ} tmp1 = vec_perm({in3}, {in1}, perm2); + nsimd_{simd_ext}_v{typ} tmp2 = vec_perm({in2}, {in3}, perm3); + + nsimd_{simd_ext}_v{typ} ret0 = vec_perm(tmp0, tmp1, perm1); + nsimd_{simd_ext}_v{typ} ret1 = vec_perm(tmp2, tmp0, perm2); + nsimd_{simd_ext}_v{typ} ret2 = vec_perm(tmp1, tmp2, perm3); + + {store}'''.format(store=store, **fmtspec) + elif typ in ['i16', 'u16']: + return \ + '''__vector unsigned char permARG = NSIMD_PERMUTE_MASK_16( + 0, 8, 0, 1, 9, 0, 2, 10); + __vector unsigned char permAXB = NSIMD_PERMUTE_MASK_16( + 0, 1, 8, 3, 4, 9, 6, 7); + + nsimd_{simd_ext}_v{typ} tmp0 = vec_perm({in1}, {in2}, permARG); + nsimd_{simd_ext}_v{typ} ret0 = vec_perm(tmp0, {in3}, permAXB); + + __vector unsigned char permBRG = NSIMD_PERMUTE_MASK_16( + 0, 3, 11, 0, 4, 12, 0, 5); + __vector unsigned char permBYB = NSIMD_PERMUTE_MASK_16( + 10, 1, 2, 11, 4, 5, 12, 7); + + nsimd_{simd_ext}_v{typ} tmp1 = vec_perm({in1}, {in2}, permBRG); + nsimd_{simd_ext}_v{typ} ret1 = vec_perm(tmp1, {in3}, permBYB); + + __vector unsigned char permCRG = NSIMD_PERMUTE_MASK_16( + 13, 0, 6, 14, 0, 7, 15, 0); + __vector unsigned char permCZB = NSIMD_PERMUTE_MASK_16( + 0, 13, 2, 3, 14, 5, 6, 15); + + nsimd_{simd_ext}_v{typ} tmp2 = vec_perm({in1}, {in2}, permCRG); + nsimd_{simd_ext}_v{typ} ret2 = vec_perm(tmp2, {in3}, permCZB); + + {store}'''.format(store=store, **fmtspec) + elif typ in ['i8', 'u8']: + return \ + '''__vector unsigned char mARG = NSIMD_PERMUTE_MASK_8( + 0, 16, 0, 1, 17, 0, 2, 18, 0, 3, 19, 0, 4, 20, 0, 5); + __vector unsigned char mAXB = NSIMD_PERMUTE_MASK_8( + 0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15); + + nsimd_{simd_ext}_v{typ} tmp0 = vec_perm({in1}, {in2}, mARG); + nsimd_{simd_ext}_v{typ} ret0 = vec_perm(tmp0, {in3}, mAXB); + + __vector unsigned char mBRG = NSIMD_PERMUTE_MASK_8( + 21, 0, 6, 22, 0, 7, 23, 0, 8, 24, 0, 9, 25, 0, 10, 26); + __vector unsigned char mBYB = NSIMD_PERMUTE_MASK_8( + 0, 21, 2, 3, 22, 5, 6, 23, 8, 9, 24, 11, 12, 25, 14, 15); + + nsimd_{simd_ext}_v{typ} tmp1 = vec_perm({in1}, {in2}, mBRG); + nsimd_{simd_ext}_v{typ} ret1 = vec_perm(tmp1, {in3}, mBYB); + + __vector unsigned char mCRG = NSIMD_PERMUTE_MASK_8( + 0, 11, 27, 0, 12, 28, 0, 13, 29, 0, 14, 30, 0, 15, 31, 0); + __vector unsigned char mCZB = NSIMD_PERMUTE_MASK_8( + 26, 1, 2, 27, 4, 5, 28, 7, 8, 29, 10, 11, 30, 13, 14, 31); + + nsimd_{simd_ext}_v{typ} tmp2 = vec_perm({in1}, {in2}, mCRG); + nsimd_{simd_ext}_v{typ} ret2 = vec_perm(tmp2, {in3}, mCZB); + + {store}'''.format(store=store, **fmtspec) + else: + if typ in ['i32', 'u32', 'f32']: + return \ + '''nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh({in1}, {in3}); + nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel({in1}, {in3}); + nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh({in2}, {in4}); + nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel({in2}, {in4}); + + nsimd_{simd_ext}_v{typ} ret0 = vec_mergeh(tmp0, tmp2); + nsimd_{simd_ext}_v{typ} ret1 = vec_mergel(tmp0, tmp2); + nsimd_{simd_ext}_v{typ} ret2 = vec_mergeh(tmp1, tmp3); + nsimd_{simd_ext}_v{typ} ret3 = vec_mergel(tmp1, tmp3); + + {store}'''.format(store=store, **fmtspec) + elif typ in ['i16', 'u16']: + return \ + '''nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh({in1}, {in3}); + nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel({in1}, {in3}); + nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh({in2}, {in4}); + nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel({in2}, {in4}); + + nsimd_{simd_ext}_v{typ} ret0 = vec_mergeh(tmp0, tmp2); + nsimd_{simd_ext}_v{typ} ret1 = vec_mergel(tmp0, tmp2); + nsimd_{simd_ext}_v{typ} ret2 = vec_mergeh(tmp1, tmp3); + nsimd_{simd_ext}_v{typ} ret3 = vec_mergel(tmp1, tmp3); + + {store}'''.format(store=store, **fmtspec) + elif typ in ['i8', 'u8']: + return \ + '''nsimd_{simd_ext}_v{typ} tmp0 = vec_mergeh({in1}, {in3}); + nsimd_{simd_ext}_v{typ} tmp1 = vec_mergel({in1}, {in3}); + nsimd_{simd_ext}_v{typ} tmp2 = vec_mergeh({in2}, {in4}); + nsimd_{simd_ext}_v{typ} tmp3 = vec_mergel({in2}, {in4}); + + nsimd_{simd_ext}_v{typ} ret0 = vec_mergeh(tmp0, tmp2); + nsimd_{simd_ext}_v{typ} ret1 = vec_mergel(tmp0, tmp2); + nsimd_{simd_ext}_v{typ} ret2 = vec_mergeh(tmp1, tmp3); + nsimd_{simd_ext}_v{typ} ret3 = vec_mergel(tmp1, tmp3); + + {store}'''.format(store=store, **fmtspec) + +# ----------------------------------------------------------------------------- +# Length + +def len1(simd_ext, typ): + return 'return {};'.format(128 // int(typ[1:])) + +# ----------------------------------------------------------------------------- +# Other helper functions + +def simple_op2(op, simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['v', 'v', 'v']) + return 'return vec_{op}({in0}, {in1});'.format(op=op, **fmtspec) + +# Binary operators: and, or, xor, andnot +def binary_op2(op, simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['v', 'v', 'v']) + else: + ppcop = {'orb': 'or', 'xorb': 'xor', 'andb': 'and', 'andnotb': 'andc'} + return 'return vec_{op}({in0}, {in1});'.format(op=ppcop[op], **fmtspec) + +# Logical operators: and, or, xor, andnot +def logical_op2(op, simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['l', 'l', 'l']) + ppcop = {'orl': 'or', 'xorl': 'xor', 'andl': 'and', 'andnotl': 'andc'} + return 'return vec_{op}({in0}, {in1});'.format(op=ppcop[op], **fmtspec) + +# ----------------------------------------------------------------------------- + +def div2(simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code('div', simd_ext, typ, ['v', 'v', 'v']) + elif typ in common.ftypes: + return 'return vec_div({in0}, {in1});'.format(**fmtspec) + elif typ in common.iutypes: + return '''nsimd_{simd_ext}_v{typ} ret; + ret = vec_splats(({typ})(vec_extract({in0}, 0) / + vec_extract({in1}, 0))); + '''.format(**fmtspec) + \ + '\n'.join( + '''ret = vec_insert(({typ})(vec_extract({in0}, {i}) / + vec_extract({in1}, {i})), ret, {i});'''. \ + format(i=i, **fmtspec) \ + for i in range(get_len(typ))) + \ + '\nreturn ret;' + +# ----------------------------------------------------------------------------- + +def not1(simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code('notb', simd_ext, typ, ['v', 'v']) + return 'return vec_nor({in0}, {in0});'.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def lnot1(simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code('notl', simd_ext, typ, ['l', 'l']) + return 'return vec_nor({in0}, {in0});'.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def sqrt1(simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code('sqrt', simd_ext, typ, ['v', 'v']) + return 'return vec_sqrt({in0});'.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def shift2(op, simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['v', 'v', 'p']) + return 'return vec_{ppcop}({in0}, vec_splats((u{typnbits}){in1}));'. \ + format(ppcop={'shl': 'sl', 'shr': 'sr', 'shra': 'sra'}[op], + **fmtspec) + +# ----------------------------------------------------------------------------- + +def set1(simd_ext, typ): + if typ == 'f16': + return '''nsimd_{simd_ext}_vf16 ret; + f32 tmp = nsimd_f16_to_f32({in0}); + ret.v0 = vec_splats(tmp); + ret.v1 = ret.v0; + return ret;'''.format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = {in0}; + ret.v1 = {in0}; + return ret;'''.format(**fmtspec) + else: + return 'return vec_splats({in0});'.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def lset1(simd_ext, typ): + if typ == 'f16': + return \ + '''nsimd_{simd_ext}_vlf16 ret; + ret.v0 = (__vector __bool int)vec_splats((u32)({in0} ? -1 : 0)); + ret.v1 = ret.v0; + return ret;'''.format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return '''nsimd_{simd_ext}_vl{typ} ret; + ret.v0 = (u64)({in0} ? -1 : 0); + ret.v1 = (u64)({in0} ? -1 : 0); + return ret;'''.format(**fmtspec) + else: + return '''if ({in0}) {{ + return ({ppc_typ})vec_splats((u{typnbits})-1); + }} else {{ + return {lzeros}; + }}'''.format(ppc_typ=native_typel(typ), **fmtspec) + +# ----------------------------------------------------------------------------- + +def cmp2(op, simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['l', 'v', 'v']) + elif typ in common.iutypes: + if op == 'ne': + return '''nsimd_{simd_ext}_vl{typ} tmp; + tmp = vec_cmpeq({in0}, {in1}); + return vec_nor(tmp, tmp);'''.format(op=op, **fmtspec) + else: + return 'return vec_cmp{op}({in0}, {in1});'.format(op=op, **fmtspec) + else: + return emulate_with_scalar(op, simd_ext, typ, ['l', 'v', 'v']) + +# ----------------------------------------------------------------------------- + +def if_else3(simd_ext, typ): + if typ == 'f16': + return emulate_f16('if_else1', simd_ext, ['v', 'l', 'v', 'v']) + elif has_to_be_emulated(simd_ext, typ): + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = ({in0}.v0 ? {in1}.v0 : {in2}.v0); + ret.v1 = ({in0}.v1 ? {in1}.v1 : {in2}.v1); + return ret;'''.format(**fmtspec) + return 'return vec_sel({in2}, {in1}, {in0});'.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def minmax2(op, simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['v', 'v', 'v']) + return 'return vec_{op}({in0}, {in1});'.format(op=op, **fmtspec) + +# ----------------------------------------------------------------------------- + +def abs1(simd_ext, typ): + if typ in common.utypes: + return 'return {in0};'.format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return emulation_code('abs', simd_ext, typ, ['v', 'v']) + return 'return vec_abs({in0});'.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def round1(op, simd_ext, typ): + if typ in common.iutypes: + return 'return {in0};'.format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['v', 'v']) + if op == 'round_to_even': + return emulate_with_scalar('round_to_even', simd_ext, typ, ['v', 'v']) + ppcop = { 'trunc': 'trunc', 'ceil': 'ceil', 'floor': 'floor' } + return 'return vec_{op}({in0});'.format(op=ppcop[op], **fmtspec) + +# ----------------------------------------------------------------------------- + +def fma(op, simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['v', 'v', 'v', 'v']) + elif typ in common.iutypes: + if op == 'fma': + return \ + 'return vec_add(vec_mul({in0}, {in1}), {in2});'.format(**fmtspec) + elif op == 'fms': + return \ + 'return vec_sub(vec_mul({in0}, {in1}), {in2});'.format(**fmtspec) + elif op == 'fnma': + return \ + 'return vec_sub({in2}, vec_mul({in0}, {in1}));'.format(**fmtspec) + elif op == 'fnms': + return '''return vec_sub(nsimd_neg_{simd_ext}_{typ}({in2}), + vec_mul({in0}, {in1}));'''.format(**fmtspec) + elif typ in common.ftypes: + ppcop = { 'fma': 'vec_madd', 'fms': 'vec_msub', 'fnms': 'vec_nmadd', + 'fnma': 'vec_nmsub' } + return 'return {ppcop}({in0}, {in1}, {in2});'. \ + format(ppcop=ppcop[op], **fmtspec) + +# ----------------------------------------------------------------------------- + +def neg1(simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code('neg', simd_ext, typ, ['v', 'v']) + elif typ in common.itypes or typ in common.ftypes: + return 'return vec_neg({in0});'.format(**fmtspec) + else: + return 'return vec_sub({zeros}, {in0});'.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def recs1(op, simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['v', 'v']) + elif op == 'rec': + return 'return vec_div(vec_splats(({typ})1), {in0});'. \ + format(**fmtspec) + elif op in ['rec8', 'rec11']: + return 'return vec_re({in0});'.format(**fmtspec) + elif op in ['rsqrt8', 'rsqrt11']: + return 'return vec_rsqrte({in0});'.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def loadl(aligned, simd_ext, typ): + return \ + '''/* This can surely be improved but it is not our priority. */ + return nsimd_notl_{simd_ext}_{typ}(nsimd_eq_{simd_ext}_{typ}( + nsimd_load{align}_{simd_ext}_{typ}( + {in0}), nsimd_set1_{simd_ext}_{typ}({zero})));'''. \ + format(align='a' if aligned else 'u', + zero='nsimd_f32_to_f16(0.0f)' if typ == 'f16' + else '({})0'.format(typ), **fmtspec) + +# ----------------------------------------------------------------------------- + +def storel(aligned, simd_ext, typ): + return \ + '''/* This can surely be improved but it is not our priority. */ + nsimd_store{align}_{simd_ext}_{typ}({in0}, + nsimd_if_else1_{simd_ext}_{typ}({in1}, + nsimd_set1_{simd_ext}_{typ}({one}), + nsimd_set1_{simd_ext}_{typ}({zero})));'''. \ + format(align='a' if aligned else 'u', + one='nsimd_f32_to_f16(1.0f)' if typ == 'f16' + else '({})1'.format(typ), + zero='nsimd_f32_to_f16(0.0f)' if typ == 'f16' + else '({})0'.format(typ), **fmtspec) + +# ----------------------------------------------------------------------------- + +def allany1(op, simd_ext, typ): + binop = '&&' if op == 'all' else '||' + if typ == 'f16': + return \ + '''return nsimd_{op}_{simd_ext}_f32({in0}.v0) {binop} + nsimd_{op}_{simd_ext}_f32({in0}.v1);'''. \ + format(op=op, binop=binop, **fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return 'return {in0}.v0 {binop} {in0}.v1;'. \ + format(binop=binop, **fmtspec) + return 'return vec_{op}_ne({in0}, ({lzeros}));'.format(op=op, **fmtspec) + +# ----------------------------------------------------------------------------- + +def nbtrue1(simd_ext, typ): + if typ == 'f16': + return \ + '''return nsimd_nbtrue_{simd_ext}_f32({in0}.v0) + + nsimd_nbtrue_{simd_ext}_f32({in0}.v1);'''. \ + format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return 'return -(int)((i64)({in0}.v0) + (i64)({in0}.v1));'. \ + format(**fmtspec) + return 'return {};'. \ + format(' + '.join('(vec_extract({in0}, {i}) ? 1 : 0)'. \ + format(i=i, **fmtspec) \ + for i in range(get_len(typ)))) + +# ----------------------------------------------------------------------------- + +def reinterpretl1(simd_ext, from_typ, to_typ): + if from_typ == to_typ: + return 'return {in0};'.format(**fmtspec) + elif simd_ext == 'vmx' and from_typ in ['f64', 'i64', 'u64']: + return \ + '''nsimd_{simd_ext}_vl{to_typ} ret; + ret.v0 = {in0}.v0; + ret.v1 = {in0}.v1; + return ret;'''.format(**fmtspec) + elif from_typ == 'f16': + return \ + '''nsimd_{simd_ext}_vl{to_typ} ret = + (__vector __bool short)vec_splats( + (u16)vec_extract({in0}.v0, 0)); + ret = (__vector __bool short)vec_insert( + (u16)vec_extract({in0}.v0, 1), ret, 1); + ret = (__vector __bool short)vec_insert( + (u16)vec_extract({in0}.v0, 2), ret, 2); + ret = (__vector __bool short)vec_insert( + (u16)vec_extract({in0}.v0, 3), ret, 3); + ret = (__vector __bool short)vec_insert( + (u16)vec_extract({in0}.v1, 0), ret, 4); + ret = (__vector __bool short)vec_insert( + (u16)vec_extract({in0}.v1, 1), ret, 5); + ret = (__vector __bool short)vec_insert( + (u16)vec_extract({in0}.v1, 2), ret, 6); + ret = (__vector __bool short)vec_insert( + (u16)vec_extract({in0}.v1, 3), ret, 7); + return ret;'''.format(**fmtspec) + elif to_typ == 'f16': + return \ + '''nsimd_{simd_ext}_vlf16 ret; + ret.v0 = (__vector __bool int)vec_splats( + (u32)(vec_extract({in0}, 0) ? -1 : 0)); + ret.v0 = (__vector __bool int)vec_insert( + (u32)(vec_extract({in0}, 1) ? -1 : 0), ret.v0, 1); + ret.v0 = (__vector __bool int)vec_insert( + (u32)(vec_extract({in0}, 2) ? -1 : 0), ret.v0, 2); + ret.v0 = (__vector __bool int)vec_insert( + (u32)(vec_extract({in0}, 3) ? -1 : 0), ret.v0, 3); + ret.v1 = (__vector __bool int)vec_splats( + (u32)(vec_extract({in0}, 4) ? -1 : 0)); + ret.v1 = (__vector __bool int)vec_insert( + (u32)(vec_extract({in0}, 5) ? -1 : 0), ret.v1, 1); + ret.v1 = (__vector __bool int)vec_insert( + (u32)(vec_extract({in0}, 6) ? -1 : 0), ret.v1, 2); + ret.v1 = (__vector __bool int)vec_insert( + (u32)(vec_extract({in0}, 7) ? -1 : 0), ret.v1, 3); + return ret;'''.format(**fmtspec) + else: + return 'return ({ppc_to_typ}){in0};'. \ + format(ppc_to_typ=native_typel(to_typ), **fmtspec) + +# ----------------------------------------------------------------------------- + +def convert1(simd_ext, from_typ, to_typ): + if from_typ == to_typ: + return 'return {in0};'.format(**fmtspec) + elif from_typ == 'f16' and to_typ == 'u16': + return \ + '''return vec_pack((__vector unsigned int)vec_ctu({in0}.v0, 0), + (__vector unsigned int)vec_ctu({in0}.v1, 0));'''. \ + format(**fmtspec) + elif from_typ == 'f16' and to_typ == 'i16': + return \ + '''return vec_pack((__vector signed int)vec_cts({in0}.v0, 0), + (__vector signed int)vec_cts({in0}.v1, 0));'''. \ + format(**fmtspec) + elif from_typ == 'u16' and to_typ == 'f16': + return \ + '''nsimd_{simd_ext}_vf16 ret; + /* Unpack extends the sign, we need to remove the extra 1s */ + __vector int mask = vec_splats((int)0xFFFF); + ret.v0 = vec_ctf(vec_and(vec_unpackh((__vector short){in0}), mask), + 0); + ret.v1 = vec_ctf(vec_and(vec_unpackl((__vector short){in0}), mask), + 0); + return ret;'''.format(**fmtspec) + elif from_typ == 'i16' and to_typ == 'f16': + return '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = vec_ctf(vec_unpackh({in0}), 0); + ret.v1 = vec_ctf(vec_unpackl({in0}), 0); + return ret;'''.format(**fmtspec) + elif has_to_be_emulated(simd_ext, to_typ): + return '''nsimd_{simd_ext}_v{to_typ} ret; + ret.v0 = nsimd_scalar_cvt_{to_typ}_{from_typ}({in0}.v0); + ret.v1 = nsimd_scalar_cvt_{to_typ}_{from_typ}({in0}.v1); + return ret;'''.format(**fmtspec) + elif from_typ in ['f32', 'f64'] and to_typ in ['i32', 'i64']: + return 'return vec_cts({in0}, 0);'.format(**fmtspec) + elif from_typ in ['f32', 'f64'] and to_typ in ['u32', 'u64']: + return 'return vec_ctu({in0}, 0);'.format(**fmtspec) + elif from_typ in ['i32', 'i64', 'u32', 'u64'] and to_typ in ['f32', 'f64']: + return 'return vec_ctf({in0}, 0);'.format(**fmtspec) + elif from_typ in common.iutypes and to_typ in common.iutypes: + return 'return ({ppctyp}){in0};'. \ + format(ppctyp=native_type(to_typ), **fmtspec) + +# ----------------------------------------------------------------------------- + +def reinterpret1(simd_ext, from_typ, to_typ): + if from_typ == to_typ: + return 'return {in0};'.format(**fmtspec) + elif simd_ext == 'vmx' and from_typ in ['f64', 'i64', 'u64']: + return \ + '''nsimd_{simd_ext}_v{to_typ} ret; + ret.v0 = nsimd_scalar_reinterpret_{to_typ}_{from_typ}({in0}.v0); + ret.v1 = nsimd_scalar_reinterpret_{to_typ}_{from_typ}({in0}.v1); + return ret;'''.format(**fmtspec) + elif from_typ == 'f16' and to_typ == 'u16': + return \ + '''nsimd_{simd_ext}_vu16 ret; + ret = vec_splats(nsimd_f32_to_u16(vec_extract({in0}.v0, 0))); + ret = vec_insert(nsimd_f32_to_u16(vec_extract({in0}.v0, 1)), + ret, 1); + ret = vec_insert(nsimd_f32_to_u16(vec_extract({in0}.v0, 2)), + ret, 2); + ret = vec_insert(nsimd_f32_to_u16(vec_extract({in0}.v0, 3)), + ret, 3); + ret = vec_insert(nsimd_f32_to_u16(vec_extract({in0}.v1, 0)), + ret, 4); + ret = vec_insert(nsimd_f32_to_u16(vec_extract({in0}.v1, 1)), + ret, 5); + ret = vec_insert(nsimd_f32_to_u16(vec_extract({in0}.v1, 2)), + ret, 6); + ret = vec_insert(nsimd_f32_to_u16(vec_extract({in0}.v1, 3)), + ret, 7); + return ret;'''.format(**fmtspec) + elif from_typ == 'f16' and to_typ == 'i16': + return \ + '''nsimd_{simd_ext}_vi16 ret; + ret = vec_splats(nsimd_scalar_reinterpret_i16_u16( + nsimd_f32_to_u16(vec_extract({in0}.v0, 0)))); + ret = vec_insert(nsimd_scalar_reinterpret_i16_u16( + nsimd_f32_to_u16(vec_extract({in0}.v0, 1))), ret, 1); + ret = vec_insert(nsimd_scalar_reinterpret_i16_u16( + nsimd_f32_to_u16(vec_extract({in0}.v0, 2))), ret, 2); + ret = vec_insert(nsimd_scalar_reinterpret_i16_u16( + nsimd_f32_to_u16(vec_extract({in0}.v0, 3))), ret, 3); + ret = vec_insert(nsimd_scalar_reinterpret_i16_u16( + nsimd_f32_to_u16(vec_extract({in0}.v1, 0))), ret, 4); + ret = vec_insert(nsimd_scalar_reinterpret_i16_u16( + nsimd_f32_to_u16(vec_extract({in0}.v1, 1))), ret, 5); + ret = vec_insert(nsimd_scalar_reinterpret_i16_u16( + nsimd_f32_to_u16(vec_extract({in0}.v1, 2))), ret, 6); + ret = vec_insert(nsimd_scalar_reinterpret_i16_u16( + nsimd_f32_to_u16(vec_extract({in0}.v1, 3))), ret, 7); + return ret;'''.format(**fmtspec) + elif from_typ == 'u16' and to_typ == 'f16': + return \ + '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = vec_splats(nsimd_u16_to_f32(vec_extract({in0}, 0))); + ret.v0 = vec_insert(nsimd_u16_to_f32(vec_extract({in0}, 1)), + ret.v0, 1); + ret.v0 = vec_insert(nsimd_u16_to_f32(vec_extract({in0}, 2)), + ret.v0, 2); + ret.v0 = vec_insert(nsimd_u16_to_f32(vec_extract({in0}, 3)), + ret.v0, 3); + ret.v1 = vec_splats(nsimd_u16_to_f32(vec_extract({in0}, 4))); + ret.v1 = vec_insert(nsimd_u16_to_f32(vec_extract({in0}, 5)), + ret.v1, 1); + ret.v1 = vec_insert(nsimd_u16_to_f32(vec_extract({in0}, 6)), + ret.v1, 2); + ret.v1 = vec_insert(nsimd_u16_to_f32(vec_extract({in0}, 7)), + ret.v1, 3); + return ret;'''.format(**fmtspec) + elif from_typ == 'i16' and to_typ == 'f16': + return \ + '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = vec_splats(nsimd_u16_to_f32( + nsimd_scalar_reinterpret_u16_i16( + vec_extract({in0}, 0)))); + ret.v0 = vec_insert(nsimd_u16_to_f32( + nsimd_scalar_reinterpret_u16_i16( + vec_extract({in0}, 1))), ret.v0, 1); + ret.v0 = vec_insert(nsimd_u16_to_f32( + nsimd_scalar_reinterpret_u16_i16( + vec_extract({in0}, 2))), ret.v0, 2); + ret.v0 = vec_insert(nsimd_u16_to_f32( + nsimd_scalar_reinterpret_u16_i16( + vec_extract({in0}, 3))), ret.v0, 3); + ret.v1 = vec_splats(nsimd_u16_to_f32( + nsimd_scalar_reinterpret_u16_i16( + vec_extract({in0}, 4)))); + ret.v1 = vec_insert(nsimd_u16_to_f32( + nsimd_scalar_reinterpret_u16_i16( + vec_extract({in0}, 5))), ret.v1, 1); + ret.v1 = vec_insert(nsimd_u16_to_f32( + nsimd_scalar_reinterpret_u16_i16( + vec_extract({in0}, 6))), ret.v1, 2); + ret.v1 = vec_insert(nsimd_u16_to_f32( + nsimd_scalar_reinterpret_u16_i16( + vec_extract({in0}, 7))), ret.v1, 3); + return ret;'''.format(**fmtspec) + else: + return 'return ({ppc_typ}){in0};'. \ + format(ppc_typ=native_type(to_typ), **fmtspec) + +# ----------------------------------------------------------------------------- + +def reverse1(simd_ext, typ): + if typ == 'f16': + return emulate_f16('reverse', simd_ext, ['v', 'v']) + elif has_to_be_emulated(simd_ext, typ): + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = {in0}.v1; + ret.v1 = {in0}.v0; + return ret;'''.format(**fmtspec) + elif typ in ['i8', 'u8']: + return '''return vec_perm({in0}, {in0}, (__vector unsigned char) + {{ 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0 }});'''. \ + format(**fmtspec) + elif typ in ['i16', 'u16']: + return '''return vec_perm({in0}, {in0}, (__vector unsigned char) + {{ 14, 15, 12, 13, 10, 11, 8, 9, + 6, 7, 4, 5, 2, 3, 0, 1 }});'''. \ + format(**fmtspec) + elif typ in ['i32', 'u32', 'f32']: + return '''return vec_perm({in0}, {in0}, (__vector unsigned char) + {{ 12, 13, 14, 15, 8, 9, 10, 11, + 4, 5, 6, 7, 0, 1, 2, 3 }});'''. \ + format(**fmtspec) + elif typ in ['f64', 'i64', 'u64']: + return '''return vec_perm({in0}, {in0}, (__vector unsigned char) + {{ 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7 }});'''. \ + format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def addv(simd_ext, typ): + if typ == 'f16': + return '''return nsimd_f32_to_f16( + nsimd_addv_{simd_ext}_f32({in0}.v0) + + nsimd_addv_{simd_ext}_f32({in0}.v1));'''.format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return 'return {in0}.v0 + {in0}.v1;'.format(**fmtspec) + return 'return ({})({});'. \ + format(typ, ' + '.join('vec_extract({in0}, {i})'. \ + format(i=i, **fmtspec) \ + for i in range(get_len(typ)))) + +# ----------------------------------------------------------------------------- + +def add_sub_s(op, simd_ext, typ): + if has_to_be_emulated(simd_ext, typ): + return emulation_code(op, simd_ext, typ, ['v', 'v', 'v']) + if typ in common.ftypes: + return 'return vec_{op}({in0}, {in1});'.format(op=op[:-1], **fmtspec) + elif typ in ['i64', 'u64']: + return '''nsimd_{simd_ext}_v{typ} ret; + ret = vec_splats(nsimd_scalar_{op}_{typ}( + vec_extract({in0}, 0), vec_extract({in1}, 0))); + ret = vec_insert(nsimd_scalar_{op}_{typ}( + vec_extract({in0}, 1), vec_extract({in1}, 1)), + ret, 1); + return ret;'''.format(op=op, **fmtspec) + return 'return vec_{op}({in0}, {in1});'.format(op=op, **fmtspec) + +# ----------------------------------------------------------------------------- + +def upcvt1(simd_ext, from_typ, to_typ): + if from_typ in ['i8', 'u8'] and to_typ == 'f16': + return '''nsimd_{simd_ext}_vf16x2 ret; + nsimd_{simd_ext}_vi16x2 tmp; + tmp = nsimd_upcvt_{simd_ext}_i16_{from_typ}(a0); + ret.v0 = nsimd_cvt_{simd_ext}_f16_i16(tmp.v0); + ret.v1 = nsimd_cvt_{simd_ext}_f16_i16(tmp.v1); + return ret;'''.format(**fmtspec) + elif from_typ == 'f16' and to_typ == 'f32': + return '''nsimd_{simd_ext}_v{to_typ}x2 ret; + ret.v0 = {in0}.v0; + ret.v1 = {in0}.v1; + return ret;'''.format(**fmtspec) + elif from_typ == 'f16' and to_typ in ['i32', 'u32']: + sign = 'u' if to_typ[0] == 'u' else 's' + return '''nsimd_{simd_ext}_v{to_typ}x2 ret; + ret.v0 = vec_ct{sign}({in0}.v0, 0); + ret.v1 = vec_ct{sign}({in0}.v1, 0); + return ret;'''.format(sign=sign, **fmtspec) + elif from_typ == 'f32' and to_typ in ['f64', 'i64', 'u64']: + if simd_ext == 'vmx': + return '''nsimd_vmx_v{to_typ}x2 ret; + ret.v0.v0 = ({to_typ})vec_extract({in0}, 0); + ret.v0.v1 = ({to_typ})vec_extract({in0}, 1); + ret.v1.v0 = ({to_typ})vec_extract({in0}, 2); + ret.v1.v1 = ({to_typ})vec_extract({in0}, 3); + return ret;'''.format(**fmtspec) + else: + return \ + '''nsimd_vsx_v{to_typ}x2 ret; + ret.v0 = vec_splats(({to_typ})vec_extract({in0}, 0)); + ret.v0 = vec_insert(({to_typ})vec_extract({in0}, 1), ret.v0, 1); + ret.v1 = vec_splats(({to_typ})vec_extract({in0}, 2)); + ret.v1 = vec_insert(({to_typ})vec_extract({in0}, 3), ret.v1, 1); + return ret;'''.format(**fmtspec) + elif (from_typ in ['i16', 'u16'] and to_typ == 'f32') or \ + (from_typ in ['i32', 'u32'] and to_typ == 'f64'): + return '''nsimd_{simd_ext}_v{to_typ}x2 ret; + nsimd_{simd_ext}_v{sto_typ}x2 tmp; + tmp = nsimd_upcvt_{simd_ext}_{sto_typ}_{from_typ}({in0}); + ret.v0 = nsimd_cvt_{simd_ext}_{to_typ}_{sto_typ}(tmp.v0); + ret.v1 = nsimd_cvt_{simd_ext}_{to_typ}_{sto_typ}(tmp.v1); + return ret;'''.format(sto_typ=from_typ[0] + to_typ[1:], + **fmtspec) + elif from_typ in ['u8', 'u16']: + mask='(i{})0x{}'.format(to_typ[1:], 'F' * (int(from_typ[1:]) // 4)) + ppc_sto_typ = native_type('i' + to_typ[1:]) + ppc_sfrom_typ = '({})'.format(native_type('i' + from_typ[1:])) + ppc_to_typ = '({})'.format(native_type(to_typ)) \ + if to_typ in common.utypes else '' + return '''nsimd_{simd_ext}_v{to_typ}x2 ret; + {ppc_sto_typ} mask = vec_splats({mask}); + ret.v0 = {ppc_to_typ}vec_and( + vec_unpackh({ppc_sfrom_typ}{in0}), mask); + ret.v1 = {ppc_to_typ}vec_and( + vec_unpackl({ppc_sfrom_typ}{in0}), mask); + return ret;'''.format(mask=mask, ppc_sto_typ=ppc_sto_typ, + ppc_sfrom_typ=ppc_sfrom_typ, + ppc_to_typ=ppc_to_typ, **fmtspec) + elif from_typ in ['i8', 'i16']: + ppc_to_typ = '({})'.format(native_type(to_typ)) \ + if to_typ in common.utypes else '' + return '''nsimd_{simd_ext}_v{to_typ}x2 ret; + ret.v0 = {ppc_to_typ}vec_unpackh({in0}); + ret.v1 = {ppc_to_typ}vec_unpackl({in0}); + return ret;'''.format(ppc_to_typ=ppc_to_typ, **fmtspec) + elif from_typ in ['i32', 'u32']: + if simd_ext == 'vmx': + return '''nsimd_vmx_v{to_typ}x2 ret; + ret.v0.v0 = ({to_typ})vec_extract({in0}, 0); + ret.v0.v1 = ({to_typ})vec_extract({in0}, 1); + ret.v1.v0 = ({to_typ})vec_extract({in0}, 2); + ret.v1.v1 = ({to_typ})vec_extract({in0}, 3); + return ret;'''.format(**fmtspec) + else: + return \ + '''nsimd_vsx_v{to_typ}x2 ret; + ret.v0 = vec_splats(({to_typ})vec_extract({in0}, 0)); + ret.v0 = vec_insert(({to_typ})vec_extract({in0}, 1), ret.v0, 1); + ret.v1 = vec_splats(({to_typ})vec_extract({in0}, 2)); + ret.v1 = vec_insert(({to_typ})vec_extract({in0}, 3), ret.v1, 1); + return ret;'''.format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def downcvt1(simd_ext, from_typ, to_typ): + if from_typ in ['f64', 'i64', 'u64']: + if simd_ext == 'vmx': + return '''nsimd_vmx_v{to_typ} ret; + ret = vec_splats(({to_typ}){in0}.v0); + ret = vec_insert(({to_typ}){in0}.v1, ret, 1); + ret = vec_insert(({to_typ}){in1}.v0, ret, 2); + ret = vec_insert(({to_typ}){in1}.v1, ret, 3); + return ret;'''.format(**fmtspec) + else: + return \ + '''nsimd_vsx_v{to_typ} ret; + ret = vec_splats(({to_typ})vec_extract({in0}, 0)); + ret = vec_insert(({to_typ})vec_extract({in0}, 1), ret, 1); + ret = vec_insert(({to_typ})vec_extract({in1}, 0), ret, 2); + ret = vec_insert(({to_typ})vec_extract({in1}, 1), ret, 3); + return ret;'''.format(**fmtspec) + elif from_typ in common.iutypes and to_typ in common.iutypes: + return 'return {cast}vec_pack({in0}, {in1});'. \ + format(cast='({})'.format(native_type(to_typ)) \ + if from_typ[0] != to_typ[0] else '', **fmtspec) + elif from_typ == 'f32' and to_typ == 'f16': + return '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = {in0}; + ret.v1 = {in1}; + return ret;'''.format(**fmtspec) + elif from_typ == 'f32' and to_typ in common.iutypes: + return 'return vec_pack(vec_ct{s}({in0}, 0), vec_ct{s}({in1}, 0));'. \ + format(s='s' if to_typ == 'i16' else 'u', **fmtspec) + elif from_typ in common.iutypes and to_typ == 'f16': + return '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = vec_ctf({in0}, 0); + ret.v1 = vec_ctf({in1}, 0); + return ret;'''.format(**fmtspec) + elif from_typ == 'f16': + return \ + '''return vec_pack(vec_pack(vec_ct{s}({in0}.v0, 0), + vec_ct{s}({in0}.v1, 0)), + vec_pack(vec_ct{s}({in1}.v0, 0), + vec_ct{s}({in1}.v1, 0)));'''. \ + format(s='s' if to_typ == 'i8' else 'u', + **fmtspec) + +# ----------------------------------------------------------------------------- + +def unzip(func, simd_ext, typ): + if typ == 'f16': + return '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = nsimd_{func}_{simd_ext}_f32({in0}.v0, {in0}.v1); + ret.v1 = nsimd_{func}_{simd_ext}_f32({in1}.v0, {in1}.v1); + return ret;'''.format(func=func, **fmtspec) + elif typ in ['f64', 'i64', 'u64']: + if simd_ext == 'vmx': + return '''nsimd_vmx_v{typ} ret; + ret.v0 = {in0}.v{i}; + ret.v1 = {in1}.v{i}; + return ret;'''.format(i=0 if func == 'unziplo' else 1, + **fmtspec) + else: + return '''nsimd_vsx_v{typ} ret; + ret = vec_splats(vec_extract({in0}, {i})); + ret = vec_insert(vec_extract({in1}, {i}), ret, 1); + return ret;'''.format(i=0 if func == 'unziplo' else 1, + **fmtspec) + elif typ in ['i8', 'u8', 'i16', 'u16', 'i32', 'u32', 'f32']: + perm = [] + le = get_len(typ) + for i in range(le): + sz = int(typ[1:]) // 8 + for j in range(0, sz): + perm += ['(unsigned char)' + str(2 * sz * i + \ + (0 if func == 'unziplo' else sz) + j)] + return \ + '''__vector unsigned char perm = {{ {perm} }}; + return vec_perm({in0}, {in1}, perm);'''. \ + format(perm=', '.join(perm), **fmtspec) + +# ----------------------------------------------------------------------------- + +def zip(op, simd_ext, typ): + if typ == 'f16': + return '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = vec_splats(vec_extract({in0}.v{i}, 0)); + ret.v0 = vec_insert(vec_extract({in0}.v{i}, 1), ret.v0, 2); + ret.v1 = vec_splats(vec_extract({in0}.v{i}, 2)); + ret.v1 = vec_insert(vec_extract({in0}.v{i}, 3), ret.v1, 2); + ret.v0 = vec_insert(vec_extract({in1}.v{i}, 0), ret.v0, 1); + ret.v0 = vec_insert(vec_extract({in1}.v{i}, 1), ret.v0, 3); + ret.v1 = vec_insert(vec_extract({in1}.v{i}, 2), ret.v1, 1); + ret.v1 = vec_insert(vec_extract({in1}.v{i}, 3), ret.v1, 3); + return ret;'''.format(i=0 if op == 'ziplo' else 1, + **fmtspec) + elif simd_ext == 'vmx' and typ in ['f64', 'i64', 'u64']: + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = {in0}.v{i}; + ret.v1 = {in1}.v{i}; + return ret;'''.format(i='1' if op == 'ziphi' else '0', + **fmtspec) + return 'return vec_merge{suf}({in0}, {in1});'. \ + format(suf='l' if op == 'ziphi' else 'h', **fmtspec) + +# ----------------------------------------------------------------------------- + +def zip_unzip_basic(op, simd_ext, typ): + return \ + '''nsimd_{simd_ext}_v{typ}x2 ret; + ret.v0 = nsimd_{pre}ziplo_{simd_ext}_{typ}({in0}, {in1}); + ret.v1 = nsimd_{pre}ziphi_{simd_ext}_{typ}({in0}, {in1}); + return ret;'''.format(pre='un' if op == 'unzip' else '', **fmtspec) + +# ----------------------------------------------------------------------------- + +def to_mask(simd_ext, typ): + if typ == 'f16': + return '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = (__vector float){in0}.v0; + ret.v1 = (__vector float){in0}.v1; + return ret;'''.format(**fmtspec) + if simd_ext == 'vmx' and typ in ['f64', 'i64']: + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = nsimd_scalar_reinterpret_{typ}_u64({in0}.v0); + ret.v1 = nsimd_scalar_reinterpret_{typ}_u64({in0}.v1); + return ret;'''.format(**fmtspec) + elif simd_ext == 'vmx' and typ == 'u64': + return '''nsimd_{simd_ext}_vu64 ret; + ret.v0 = {in0}.v0; + ret.v1 = {in0}.v1; + return ret;'''.format(**fmtspec) + return 'return ({ppc_typ}){in0};'. \ + format(ppc_typ=native_type(typ), **fmtspec) + +# ----------------------------------------------------------------------------- + +def iota(simd_ext, typ): + if typ == 'f16': + return '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = vec_splats(0.0f); + ret.v0 = vec_insert(1.0f, ret.v0, 1); + ret.v0 = vec_insert(2.0f, ret.v0, 2); + ret.v0 = vec_insert(3.0f, ret.v0, 3); + ret.v1 = vec_splats(4.0f); + ret.v1 = vec_insert(5.0f, ret.v1, 1); + ret.v1 = vec_insert(6.0f, ret.v1, 2); + ret.v1 = vec_insert(7.0f, ret.v1, 3); + return ret;'''.format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return '''nsimd_vmx_v{typ} ret; + ret.v0 = ({typ})0; + ret.v1 = ({typ})1; + return ret;'''.format(**fmtspec) + return 'nsimd_{simd_ext}_v{typ} ret;\n' \ + 'ret = vec_splats(({typ})0);\n'.format(**fmtspec) + \ + '\n'.join('ret = vec_insert(({}){}, ret, {});'.format(typ, i, i) \ + for i in range(1, get_len(typ))) + \ + '\nreturn ret;' + +# ----------------------------------------------------------------------------- + +def mask_for_loop_tail(simd_ext, typ): + le = get_len(typ) + if typ == 'f16': + threshold = 'nsimd_f32_to_f16((f32)({in1} - {in0}))'.format(**fmtspec) + else: + threshold = '({typ})({in1} - {in0})'.format(**fmtspec) + return '''if ({in0} >= {in1}) {{ + return nsimd_set1l_{simd_ext}_{typ}(0); + }} + if ({in1} - {in0} < {le}) {{ + nsimd_{simd_ext}_v{typ} n = + nsimd_set1_{simd_ext}_{typ}({threshold}); + return nsimd_lt_{simd_ext}_{typ}( + nsimd_iota_{simd_ext}_{typ}(), n); + }} else {{ + return nsimd_set1l_{simd_ext}_{typ}(1); + }}'''.format(le=le, threshold=threshold, **fmtspec) + +# ----------------------------------------------------------------------------- + +def scatter(simd_ext, typ): + le = get_len(typ) + if typ == 'f16': + return \ + '''{in0}[vec_extract({in1}, 0)] = nsimd_f32_to_f16( + vec_extract({in2}.v0, 0)); + {in0}[vec_extract({in1}, 1)] = nsimd_f32_to_f16( + vec_extract({in2}.v0, 1)); + {in0}[vec_extract({in1}, 2)] = nsimd_f32_to_f16( + vec_extract({in2}.v0, 2)); + {in0}[vec_extract({in1}, 3)] = nsimd_f32_to_f16( + vec_extract({in2}.v0, 3)); + {in0}[vec_extract({in1}, 4)] = nsimd_f32_to_f16( + vec_extract({in2}.v1, 0)); + {in0}[vec_extract({in1}, 5)] = nsimd_f32_to_f16( + vec_extract({in2}.v1, 1)); + {in0}[vec_extract({in1}, 6)] = nsimd_f32_to_f16( + vec_extract({in2}.v1, 2)); + {in0}[vec_extract({in1}, 7)] = nsimd_f32_to_f16( + vec_extract({in2}.v1, 3));'''. \ + format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return '''{in0}[{in1}.v0] = {in2}.v0; + {in0}[{in1}.v1] = {in2}.v1;'''.format(**fmtspec) + return '\n'.join(['{in0}[vec_extract({in1}, {i})] = ' \ + 'vec_extract({in2}, {i});'.format(i=i, **fmtspec) \ + for i in range(get_len(typ))]) + +# ----------------------------------------------------------------------------- + +def gather(simd_ext, typ): + if typ == 'f16': + return \ + '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = vec_splats(nsimd_f16_to_f32({in0}[vec_extract({in1}, 0)])); + ret.v0 = vec_insert(nsimd_f16_to_f32({in0}[vec_extract({in1}, 0)]), + ret.v0, 1); + ret.v0 = vec_insert(nsimd_f16_to_f32({in0}[vec_extract({in1}, 0)]), + ret.v0, 2); + ret.v0 = vec_insert(nsimd_f16_to_f32({in0}[vec_extract({in1}, 0)]), + ret.v0, 3); + ret.v1 = vec_splats(nsimd_f16_to_f32({in0}[vec_extract({in1}, 0)])); + ret.v1 = vec_insert(nsimd_f16_to_f32({in0}[vec_extract({in1}, 0)]), + ret.v1, 1); + ret.v1 = vec_insert(nsimd_f16_to_f32({in0}[vec_extract({in1}, 0)]), + ret.v1, 2); + ret.v1 = vec_insert(nsimd_f16_to_f32({in0}[vec_extract({in1}, 0)]), + ret.v1, 3); + return ret;'''.format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = {in0}[{in1}.v0]; + ret.v1 = {in0}[{in1}.v1]; + return ret;'''.format(**fmtspec) + return '''nsimd_{simd_ext}_v{typ} ret; + ret = vec_splats({in0}[vec_extract({in1}, 0)]); + '''.format(**fmtspec) + \ + '\n'.join('ret = vec_insert({in0}[vec_extract({in1}, {i})], ' \ + 'ret, {i});'.format(i=i, **fmtspec) \ + for i in range(1, get_len(typ))) + '\n' + \ + 'return ret;' + +# ----------------------------------------------------------------------------- + +def gather_linear(simd_ext, typ): + if typ == 'f16': + return \ + '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = vec_splats(nsimd_f16_to_f32({in0}[0])); + ret.v0 = vec_insert(nsimd_f16_to_f32({in0}[{in1}]), ret.v0, 1); + ret.v0 = vec_insert(nsimd_f16_to_f32({in0}[2 * {in1}]), ret.v0, 2); + ret.v0 = vec_insert(nsimd_f16_to_f32({in0}[3 * {in1}]), ret.v0, 3); + ret.v1 = vec_splats(nsimd_f16_to_f32({in0}[4 * {in1}])); + ret.v1 = vec_insert(nsimd_f16_to_f32({in0}[5 * {in1}]), ret.v1, 1); + ret.v1 = vec_insert(nsimd_f16_to_f32({in0}[6 * {in1}]), ret.v1, 2); + ret.v1 = vec_insert(nsimd_f16_to_f32({in0}[7 * {in1}]), ret.v1, 3); + return ret;'''.format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = {in0}[0]; + ret.v1 = {in0}[{in1}]; + return ret;'''.format(**fmtspec) + return '''nsimd_{simd_ext}_v{typ} ret; + ret = vec_splats({in0}[0]); + '''.format(**fmtspec) + \ + '\n'.join('ret = vec_insert({in0}[{in1} * {i}], ret, {i});'. \ + format(i=i, **fmtspec) for i in range(1, get_len(typ))) + \ + '\nreturn ret;' + +# ----------------------------------------------------------------------------- + +def scatter_linear(simd_ext, typ): + if typ == 'f16': + return \ + '''{in0}[0] = nsimd_f32_to_f16(vec_extract({in2}.v0, 0)); + {in0}[{in1}] = nsimd_f32_to_f16(vec_extract({in2}.v0, 1)); + {in0}[2 * {in1}] = nsimd_f32_to_f16(vec_extract({in2}.v0, 2)); + {in0}[3 * {in1}] = nsimd_f32_to_f16(vec_extract({in2}.v0, 3)); + {in0}[4 * {in1}] = nsimd_f32_to_f16(vec_extract({in2}.v1, 0)); + {in0}[5 * {in1}] = nsimd_f32_to_f16(vec_extract({in2}.v1, 1)); + {in0}[6 * {in1}] = nsimd_f32_to_f16(vec_extract({in2}.v1, 2)); + {in0}[7 * {in1}] = nsimd_f32_to_f16(vec_extract({in2}.v1, 3));'''. \ + format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return '''{in0}[0] = {in2}.v0; + {in0}[{in1}] = {in2}.v1;'''.format(**fmtspec) + return '\n'.join(['{in0}[{in1} * {i}] = vec_extract({in2}, {i});'. \ + format(i=i, **fmtspec) for i in range(get_len(typ))]) + +# ----------------------------------------------------------------------------- + +def maskoz_load(oz, simd_ext, typ): + if typ == 'f16': + return \ + '''nsimd_{simd_ext}_vf16 ret; + ret.v0 = vec_splats(0.0f); + ret.v0 = vec_insert(vec_extract({in0}.v0, 0) ? + nsimd_f16_to_f32({in1}[0]) : {oz0}, ret.v0, 0); + ret.v0 = vec_insert(vec_extract({in0}.v0, 1) ? + nsimd_f16_to_f32({in1}[1]) : {oz1}, ret.v0, 1); + ret.v0 = vec_insert(vec_extract({in0}.v0, 2) ? + nsimd_f16_to_f32({in1}[2]) : {oz2}, ret.v0, 2); + ret.v0 = vec_insert(vec_extract({in0}.v0, 3) ? + nsimd_f16_to_f32({in1}[3]) : {oz3}, ret.v0, 3); + ret.v1 = ret.v0; + ret.v1 = vec_insert(vec_extract({in0}.v1, 0) ? + nsimd_f16_to_f32({in1}[4]) : {oz4}, ret.v1, 0); + ret.v1 = vec_insert(vec_extract({in0}.v1, 1) ? + nsimd_f16_to_f32({in1}[5]) : {oz5}, ret.v1, 1); + ret.v1 = vec_insert(vec_extract({in0}.v1, 2) ? + nsimd_f16_to_f32({in1}[6]) : {oz6}, ret.v1, 2); + ret.v1 = vec_insert(vec_extract({in0}.v1, 3) ? + nsimd_f16_to_f32({in1}[7]) : {oz7}, ret.v1, 3); + return ret;'''. \ + format(oz0='0.0f' if oz == 'z' else 'vec_extract({in2}.v0, 0)', + oz1='0.0f' if oz == 'z' else 'vec_extract({in2}.v0, 1)', + oz2='0.0f' if oz == 'z' else 'vec_extract({in2}.v0, 2)', + oz3='0.0f' if oz == 'z' else 'vec_extract({in2}.v0, 3)', + oz4='0.0f' if oz == 'z' else 'vec_extract({in2}.v1, 0)', + oz5='0.0f' if oz == 'z' else 'vec_extract({in2}.v1, 1)', + oz6='0.0f' if oz == 'z' else 'vec_extract({in2}.v1, 2)', + oz7='0.0f' if oz == 'z' else 'vec_extract({in2}.v1, 3)', + **fmtspec).format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + if oz == 'z': + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = {in0}.v0 ? {in1}[0] : ({typ})0; + ret.v1 = {in0}.v1 ? {in1}[1] : ({typ})0; + return ret;'''.format(**fmtspec) + else: + return '''nsimd_{simd_ext}_v{typ} ret; + ret.v0 = {in0}.v0 ? {in1}[0] : {in2}.v0; + ret.v1 = {in0}.v1 ? {in1}[1] : {in2}.v1; + return ret;'''.format(**fmtspec) + return 'nsimd_{simd_ext}_v{typ} ret = {zeros};\n'.format(**fmtspec) + \ + '\n'.join( + '''if (vec_extract({in0}, {i})) {{ + ret = vec_insert({in1}[{i}], ret, {i}); + }} else {{ + ret = vec_insert({v}, ret, {i}); + }}'''.format(i=i, v='({})0'.format(typ) if oz == 'z' \ + else 'vec_extract({in2}, {i})'. \ + format(i=i, **fmtspec), **fmtspec) \ + for i in range(get_len(typ))) + \ + '\nreturn ret;' + +# ----------------------------------------------------------------------------- + +def mask_store(simd_ext, typ): + if typ == 'f16': + return \ + '''if (vec_extract({in0}.v0, 0)) {{ + {in1}[0] = nsimd_f32_to_f16(vec_extract({in2}.v0, 0)); + }} + if (vec_extract({in0}.v0, 1)) {{ + {in1}[1] = nsimd_f32_to_f16(vec_extract({in2}.v0, 1)); + }} + if (vec_extract({in0}.v0, 2)) {{ + {in1}[2] = nsimd_f32_to_f16(vec_extract({in2}.v0, 2)); + }} + if (vec_extract({in0}.v0, 3)) {{ + {in1}[3] = nsimd_f32_to_f16(vec_extract({in2}.v0, 3)); + }} + if (vec_extract({in0}.v1, 0)) {{ + {in1}[4] = nsimd_f32_to_f16(vec_extract({in2}.v1, 0)); + }} + if (vec_extract({in0}.v1, 1)) {{ + {in1}[5] = nsimd_f32_to_f16(vec_extract({in2}.v1, 1)); + }} + if (vec_extract({in0}.v1, 2)) {{ + {in1}[6] = nsimd_f32_to_f16(vec_extract({in2}.v1, 2)); + }} + if (vec_extract({in0}.v1, 3)) {{ + {in1}[7] = nsimd_f32_to_f16(vec_extract({in2}.v1, 3)); + }}'''.format(**fmtspec) + elif has_to_be_emulated(simd_ext, typ): + return '''if ({in0}.v0) {{ + {in1}[0] = {in2}.v0; + }} + if ({in0}.v1) {{ + {in1}[1] = {in2}.v1; + }}'''.format(**fmtspec) + return '\n'.join( + '''if (vec_extract({in0}, {i})) {{ + {in1}[{i}] = vec_extract({in2}, {i}); + }}'''.format(i=i, **fmtspec) for i in range(get_len(typ))) + +# ----------------------------------------------------------------------------- + +def to_logical(simd_ext, typ): + if typ == 'f16': + return emulate_f16('to_logical', simd_ext, ['l', 'v']) + elif has_to_be_emulated(simd_ext, typ): + if typ in ['i64', 'u64']: + return '''nsimd_{simd_ext}_vl{typ} ret; + ret.v0 = (u64)({in0}.v0 != ({typ})0 ? -1 : 0); + ret.v1 = (u64)({in0}.v1 != ({typ})0 ? -1 : 0); + return ret;'''.format(**fmtspec) + elif typ == 'f64': + return '''nsimd_{simd_ext}_vl{typ} ret; + ret.v0 = (u64)(nsimd_scalar_reinterpret_u64_f64( + {in0}.v0) != (u64)0 ? -1 : 0); + ret.v1 = (u64)(nsimd_scalar_reinterpret_u64_f64( + {in0}.v1) != (u64)0 ? -1 : 0); + return ret;'''.format(**fmtspec) + elif typ in common.iutypes: + return 'return nsimd_ne_{simd_ext}_{typ}({in0}, {zeros});'. \ + format(**fmtspec) + elif typ in ['f32', 'f64']: + return '''return nsimd_ne_{simd_ext}_u{typnbits}( + nsimd_reinterpret_{simd_ext}_u{typnbits}_{typ}( + {in0}), vec_splats((u{typnbits})0));'''. \ + format(**fmtspec) + +# ----------------------------------------------------------------------------- + +def get_impl(opts, func, simd_ext, from_typ, to_typ): + global fmtspec + + fmtspec = { + 'simd_ext': simd_ext, + 'typ': from_typ, + 'styp': get_type(opts, simd_ext, from_typ, to_typ), + 'from_typ': from_typ, + 'to_typ': to_typ, + 'in0': common.in0, + 'in1': common.in1, + 'in2': common.in2, + 'in3': common.in3, + 'in4': common.in4, + 'in5': common.in5, + 'zeros': 'vec_splats(({})0)'.format(from_typ), + 'lzeros': '({})vec_splats((u{})0)'. \ + format(native_typel(from_typ), from_typ[1:]) \ + if not has_to_be_emulated(simd_ext, from_typ) else '', + 'typnbits': from_typ[1:] + } + + impls = { + 'loada': 'load1234(simd_ext, from_typ, 1, True)', + 'load2a': 'load1234(simd_ext, from_typ, 2, True)', + 'load3a': 'load1234(simd_ext, from_typ, 3, True)', + 'load4a': 'load1234(simd_ext, from_typ, 4, True)', + 'loadu': 'load1234(simd_ext, from_typ, 1, False)', + 'load2u': 'load1234(simd_ext, from_typ, 2, False)', + 'load3u': 'load1234(simd_ext, from_typ, 3, False)', + 'load4u': 'load1234(simd_ext, from_typ, 4, False)', + 'storea': 'store1234(simd_ext, from_typ, 1, True)', + 'store2a': 'store1234(simd_ext, from_typ, 2, True)', + 'store3a': 'store1234(simd_ext, from_typ, 3, True)', + 'store4a': 'store1234(simd_ext, from_typ, 4, True)', + 'storeu': 'store1234(simd_ext, from_typ, 1, False)', + 'store2u': 'store1234(simd_ext, from_typ, 2, False)', + 'store3u': 'store1234(simd_ext, from_typ, 3, False)', + 'store4u': 'store1234(simd_ext, from_typ, 4, False)', + 'andb': 'binary_op2("andb", simd_ext, from_typ)', + 'xorb': 'binary_op2("xorb", simd_ext, from_typ)', + 'orb': 'binary_op2("orb", simd_ext, from_typ)', + 'andl': 'logical_op2("andl", simd_ext, from_typ)', + 'xorl': 'logical_op2("xorl", simd_ext, from_typ)', + 'orl': 'logical_op2("orl", simd_ext, from_typ)', + 'notb': 'not1(simd_ext, from_typ)', + 'notl': 'lnot1(simd_ext, from_typ)', + 'andnotb': 'binary_op2("andnotb", simd_ext, from_typ)', + 'andnotl': 'logical_op2("andnotl", simd_ext, from_typ)', + 'add': 'simple_op2("add", simd_ext, from_typ)', + 'adds': 'add_sub_s("adds",simd_ext, from_typ)', + 'sub': 'simple_op2("sub", simd_ext, from_typ)', + 'subs': 'add_sub_s("subs",simd_ext, from_typ)', + 'div': 'div2(simd_ext, from_typ)', + 'sqrt': 'sqrt1(simd_ext, from_typ)', + 'len': 'len1(simd_ext, from_typ)', + 'mul': 'simple_op2("mul", simd_ext, from_typ)', + 'shl': 'shift2("shl", simd_ext, from_typ)', + 'shr': 'shift2("shr", simd_ext, from_typ)', + 'shra': 'shift2("shra", simd_ext, from_typ)', + 'set1': 'set1(simd_ext, from_typ)', + 'set1l': 'lset1(simd_ext, from_typ)', + 'eq': 'cmp2("eq", simd_ext, from_typ)', + 'lt': 'cmp2("lt", simd_ext, from_typ)', + 'le': 'cmp2("le", simd_ext, from_typ)', + 'gt': 'cmp2("gt", simd_ext, from_typ)', + 'ge': 'cmp2("ge", simd_ext, from_typ)', + 'ne': 'cmp2("ne", simd_ext, from_typ)', + 'if_else1': 'if_else3(simd_ext, from_typ)', + 'min': 'minmax2("min", simd_ext, from_typ)', + 'max': 'minmax2("max", simd_ext, from_typ)', + 'loadla': 'loadl(True, simd_ext, from_typ)', + 'loadlu': 'loadl(False, simd_ext, from_typ)', + 'storela': 'storel(True, simd_ext, from_typ)', + 'storelu': 'storel(False, simd_ext, from_typ)', + 'abs': 'abs1(simd_ext, from_typ)', + 'fma': 'fma("fma", simd_ext, from_typ)', + 'fnma': 'fma("fnma", simd_ext, from_typ)', + 'fms': 'fma("fms", simd_ext, from_typ)', + 'fnms': 'fma("fnms", simd_ext, from_typ)', + 'ceil': 'round1("ceil", simd_ext, from_typ)', + 'floor': 'round1("floor", simd_ext, from_typ)', + 'trunc': 'round1("trunc", simd_ext, from_typ)', + 'round_to_even': 'round1("round_to_even", simd_ext, from_typ)', + 'all': 'allany1("all", simd_ext, from_typ)', + 'any': 'allany1("any", simd_ext, from_typ)', + 'reinterpret': 'reinterpret1(simd_ext, from_typ, to_typ)', + 'reinterpretl': 'reinterpretl1(simd_ext, from_typ, to_typ)', + 'cvt': 'convert1(simd_ext, from_typ, to_typ)', + 'rec8': 'recs1("rec8", simd_ext, from_typ)', + 'rec11': 'recs1("rec11", simd_ext, from_typ)', + 'rsqrt8': 'recs1("rsqrt8", simd_ext, from_typ)', + 'rsqrt11': 'recs1("rsqrt11", simd_ext, from_typ)', + 'rec': 'recs1("rec", simd_ext, from_typ)', + 'neg': 'neg1(simd_ext, from_typ)', + 'nbtrue': 'nbtrue1(simd_ext, from_typ)', + 'reverse': 'reverse1(simd_ext, from_typ)', + 'addv': 'addv(simd_ext, from_typ)', + 'upcvt': 'upcvt1(simd_ext, from_typ, to_typ)', + 'downcvt': 'downcvt1(simd_ext, from_typ, to_typ)', + 'iota': 'iota(simd_ext, from_typ)', + 'to_logical': 'to_logical(simd_ext, from_typ)', + 'mask_for_loop_tail': 'mask_for_loop_tail(simd_ext, from_typ)', + 'masko_loadu1': 'maskoz_load("o", simd_ext, from_typ)', + 'maskz_loadu1': 'maskoz_load("z", simd_ext, from_typ)', + 'masko_loada1': 'maskoz_load("o", simd_ext, from_typ)', + 'maskz_loada1': 'maskoz_load("z", simd_ext, from_typ)', + 'mask_storea1': 'mask_store(simd_ext, from_typ)', + 'mask_storeu1': 'mask_store(simd_ext, from_typ)', + 'gather': 'gather(simd_ext, from_typ)', + 'scatter': 'scatter(simd_ext, from_typ)', + 'gather_linear': 'gather_linear(simd_ext, from_typ)', + 'scatter_linear': 'scatter_linear(simd_ext, from_typ)', + 'to_mask': 'to_mask(simd_ext, from_typ)', + 'ziplo': 'zip("ziplo", simd_ext, from_typ)', + 'ziphi': 'zip("ziphi", simd_ext, from_typ)', + 'zip': 'zip_unzip_basic("zip", simd_ext, from_typ)', + 'unzip': 'zip_unzip_basic("unzip", simd_ext, from_typ)', + 'unziplo': 'unzip("unziplo", simd_ext, from_typ)', + 'unziphi': 'unzip("unziphi", simd_ext, from_typ)' + } + if simd_ext not in get_simd_exts(): + raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) + if not from_typ in common.types: + raise ValueError('Unknown type "{}"'.format(from_typ)) + if not func in impls: + return common.NOT_IMPLEMENTED + else: + return eval(impls[func]) diff --git a/egg/platform_x86.py b/egg/platform_x86.py index d462cde3..a84ccc5e 100644 --- a/egg/platform_x86.py +++ b/egg/platform_x86.py @@ -42,6 +42,20 @@ def get_simd_exts(): return ['sse2', 'sse42', 'avx', 'avx2', 'avx512_knl', 'avx512_skylake'] +def get_prev_simd_ext(simd_ext): + if simd_ext == 'sse2': + return 'cpu' + elif simd_ext == 'sse42': + return 'sse2' + elif simd_ext == 'avx': + return 'sse42' + elif simd_ext == 'avx2': + return 'avx' + elif simd_ext in avx512: + return 'avx2' + raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) + + def emulate_fp16(simd_ext): if not simd_ext in get_simd_exts(): raise ValueError('Unknown SIMD extension "{}"'.format(simd_ext)) @@ -1630,14 +1644,12 @@ def gt2(simd_ext, typ): # _mm_sub_epi64({in1}, {in0}), 63));'''. \ # format(**fmtspec) return '''{typ} buf0[2], buf1[2]; - _mm_storeu_si128((__m128i*)buf0, {in0}); _mm_storeu_si128((__m128i*)buf1, {in1}); - buf0[0] = -(buf0[0] > buf1[0]); buf0[1] = -(buf0[1] > buf1[1]); - - return _mm_loadu_si128((__m128i*)buf0);'''.format(**fmtspec) + return _mm_loadu_si128((__m128i*)buf0);'''. \ + format(**fmtspec) return cmp2_with_add('gt', simd_ext, typ) if simd_ext in avx: if typ in ['f32', 'f64']: diff --git a/egg/scalar.py b/egg/scalar.py index 0ee024fe..1545a847 100644 --- a/egg/scalar.py +++ b/egg/scalar.py @@ -235,7 +235,8 @@ def reinterpret(totyp, typ): '''#if defined(NSIMD_ARM_FP16) && defined(NSIMD_IS_GCC) {via_union} #elif (defined(NSIMD_ARM_FP16) && !defined(NSIMD_IS_GCC)) || \ - defined(NSIMD_CUDA) || defined(NSIMD_ROCM) + defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || \ + defined(NSIMD_ONEAPI) {via_memcpy} #else {emulated} @@ -254,7 +255,8 @@ def reinterpret(totyp, typ): '''#if defined(NSIMD_ARM_FP16) && defined(NSIMD_IS_GCC) {via_union} #elif (defined(NSIMD_ARM_FP16) && !defined(NSIMD_IS_GCC)) || \ - defined(NSIMD_CUDA) || defined(NSIMD_ROCM) + defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || \ + defined(NSIMD_ONEAPI) {via_memcpy} #else {emulated} @@ -429,6 +431,21 @@ def get_impl(operator, totyp, typ): format(typ2=typ2, neg=neg, op=op, **fmtspec), typ) f = 'f' if typ in ['f16', 'f32'] else '' typ2 = 'f32' if typ == 'f16' else typ + if operator.src: + if typ == 'f16': + return \ + '''return nsimd_f32_to_f16( + nsimd_sleef_{op_name}_scalar_f32({vas}));'''. \ + format(op_name=operator.name, + vas=', '.join(['nsimd_f16_to_f32({})'. \ + format(common.get_arg(i)) \ + for i in range(len(operator.params[1:]))]), + **fmtspec) + else: + return 'return nsimd_sleef_{op_name}_scalar_{typ}({vas});'. \ + format(op_name=operator.name, + vas=common.get_args(len(operator.params[1:])), + **fmtspec) func = { 'orb': lambda: opbit('{in0} | {in1}', typ), 'andb': lambda: opbit('{in0} & {in1}', typ), diff --git a/egg/ulp.json b/egg/ulp.json deleted file mode 100644 index fa8bfb93..00000000 --- a/egg/ulp.json +++ /dev/null @@ -1,654 +0,0 @@ -{ "ulps": [ - -{ - "func":"acosh", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"true", "Value causing NaN":"0xfbff", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"acosh", "type":"f32", - "ulps" : "22", "Worst value": "0x3f8ad853", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"acosh", "type":"f64", - "ulps" : "52", "Worst value": "0x0", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"true", "Value causing NaN":"0xdf9731a2e63cf8af", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"acos", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"acos", "type":"f32", - "ulps" : "16", "Worst value": "0x3f7fffb5", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"acos", "type":"f64", - "ulps" : "43", "Worst value": "0x3feffffe2a3ffffc", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"asinh", "type":"f16", - "ulps" : "10", "Worst value": "0x1001", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"asinh", "type":"f32", - "ulps" : "21", "Worst value": "0x3f003ff7", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"asinh", "type":"f64", - "ulps" : "51", "Worst value": "0xbfdd5d85b03abb0b", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"asin", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"asin", "type":"f32", - "ulps" : "22", "Worst value": "0x3f00020f", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"asin", "type":"f64", - "ulps" : "52", "Worst value": "0xbfeaed8e3c35db1c", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"atanh", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"atanh", "type":"f32", - "ulps" : "22", "Worst value": "0x3ed01ed1", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"atanh", "type":"f64", - "ulps" : "51", "Worst value": "0x3fd9842f6433085f", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"atan", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"atan", "type":"f32", - "ulps" : "22", "Worst value": "0x3ed41685", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"atan", "type":"f64", - "ulps" : "51", "Worst value": "0x3fda8299ac350533", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"cosh", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"cosh", "type":"f32", - "ulps" : "22", "Worst value": "0x42b0138f", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"cosh", "type":"f64", - "ulps" : "52", "Worst value": "0x3e50260f9982ec06", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"cos", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"cos", "type":"f32", - "ulps" : "22", "Worst value": "0x6f6b459f", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"cos", "type":"f64", - "ulps" : "51", "Worst value": "0xc0eb0dc64de64ff0", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"coth", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"coth", "type":"f32", - "ulps" : "20", "Worst value": "0x3f1cd666", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"coth", "type":"f64", - "ulps" : "51", "Worst value": "0x3fbbd042e037a086", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"cot", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"cot", "type":"f32", - "ulps" : "22", "Worst value": "0x6e67ff2e", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"cot", "type":"f64", - "ulps" : "51", "Worst value": "0x5dc04a55ed8cbfd5", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"csch", "type":"f16", - "ulps" : "10", "Worst value": "0x18ce", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"csch", "type":"f32", - "ulps" : "22", "Worst value": "0x42aff7ed", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0x42d15445", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"csch", "type":"f64", - "ulps" : "51", "Worst value": "0xc01ab4c6bf35698d", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0x40874508687b5836", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"csc", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"csc", "type":"f32", - "ulps" : "22", "Worst value": "0x42a38075", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"csc", "type":"f64", - "ulps" : "51", "Worst value": "0x40de4b164c382f46", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"erfc", "type":"f16", - "ulps" : "10", "Worst value": "0x1717", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"erfc", "type":"f32", - "ulps" : "17", "Worst value": "0x41122765", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0x4120dde8", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"erfc", "type":"f64", - "ulps" : "47", "Worst value": "0x0", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0x403aff26abc28405", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0xdf9731a2e63cf8af" -}, -{ - "func":"erfcx", "type":"f16", - "ulps" : "0", "Worst value": "0x0", - "ulps for denormalized output" : "0", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"erfcx", "type":"f32", - "ulps" : "0", "Worst value": "0x4bc4454d", - "ulps for denormalized output" : "0", "Worst value for dnz output" : "0x4120dde8", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"erfcx", "type":"f64", - "ulps" : "0", "Worst value": "0x0", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x0" -}, -{ - "func":"erf", "type":"f16", - "ulps" : "10", "Worst value": "0x1b17", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"erf", "type":"f32", - "ulps" : "22", "Worst value": "0x3ef9bd28", - "ulps for denormalized output" : "15", "Worst value for dnz output" : "0x8000b3e7", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"erf", "type":"f64", - "ulps" : "48", "Worst value": "0x0", - "ulps for denormalized output" : "45", "Worst value for dnz output" : "0x207a22867655", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0xdf9731a2e63cf8af" -}, -{ - "func":"exp10", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"exp10", "type":"f32", - "ulps" : "23", "Worst value": "0x33a6c4e1", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x421a207e" -}, -{ - "func":"exp10", "type":"f64", - "ulps" : "51", "Worst value": "0x401175df8162ebbf", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0xc073cbcb82d72a6d", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"exp2", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"exp2", "type":"f32", - "ulps" : "23", "Worst value": "0x373cfe07", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x42ffff81" -}, -{ - "func":"exp2", "type":"f64", - "ulps" : "52", "Worst value": "0x3cc14f77f5157932", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0xc090b20cca325451", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x408ffa284d82d0c0" -}, -{ - "func":"expm1", "type":"f16", - "ulps" : "10", "Worst value": "0xcbf9", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"expm1", "type":"f32", - "ulps" : "23", "Worst value": "0x3ecf9934", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x42b17187" -}, -{ - "func":"expm1", "type":"f64", - "ulps" : "52", "Worst value": "0xc042208b98a309a4", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x40862cb68ae2a314" -}, -{ - "func":"exp", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"exp", "type":"f32", - "ulps" : "23", "Worst value": "0x38b69df7", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x42b17187" -}, -{ - "func":"exp", "type":"f64", - "ulps" : "52", "Worst value": "0x3d734d7ffd58558c", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0xc086d796cd41ce50", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x40862cb68ae2a314" -}, -{ - "func":"gamma", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x83ff" -}, -{ - "func":"gamma", "type":"f32", - "ulps" : "20", "Worst value": "0x0", - "ulps for denormalized output" : "0", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x7f7fffef" -}, -{ - "func":"gamma", "type":"f64", - "ulps" : "48", "Worst value": "0x0", - "ulps for denormalized output" : "0", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"true", "Value causing Inf error":"0x0" -}, - -{ - "func":"log10", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log10", "type":"f32", - "ulps" : "23", "Worst value": "0x322bc8d8", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log10", "type":"f64", - "ulps" : "51", "Worst value": "0x3ff558043e2ab008", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log1p", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log1p", "type":"f32", - "ulps" : "23", "Worst value": "0x40cc7469", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log1p", "type":"f64", - "ulps" : "52", "Worst value": "0xbfbe14b2503c2965", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log2", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log2", "type":"f32", - "ulps" : "22", "Worst value": "0x3f403ada", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log2", "type":"f64", - "ulps" : "52", "Worst value": "0x3ff306fec4260dfe", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log", "type":"f32", - "ulps" : "23", "Worst value": "0x3ebc5a05", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"log", "type":"f64", - "ulps" : "52", "Worst value": "0x4005bf0b172b7e16", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sech", "type":"f16", - "ulps" : "10", "Worst value": "0x3cfb", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sech", "type":"f32", - "ulps" : "22", "Worst value": "0x42aff7ed", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0x42d15445", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sech", "type":"f64", - "ulps" : "51", "Worst value": "0xbffc66fe3438cdfc", - "ulps for denormalized output" : "-1", "Worst value for dnz output" : "0x40874508687b5836", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sec", "type":"f16", - "ulps" : "10", "Worst value": "0x3c48", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sec", "type":"f32", - "ulps" : "22", "Worst value": "0x433ac901", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sec", "type":"f64", - "ulps" : "51", "Worst value": "0x40ca67da6f121256", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sinh", "type":"f16", - "ulps" : "10", "Worst value": "0x32c7", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sinh", "type":"f32", - "ulps" : "22", "Worst value": "0x42b0138f", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sinh", "type":"f64", - "ulps" : "51", "Worst value": "0x3ff62e6de02c5cdc", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sin", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sin", "type":"f32", - "ulps" : "22", "Worst value": "0x60f81e17", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sin", "type":"f64", - "ulps" : "51", "Worst value": "0x40e54ac30bee13b6", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sqrt", "type":"f16", - "ulps" : "10", "Worst value": "0x0", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sqrt", "type":"f32", - "ulps" : "23", "Worst value": "0x0", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"sqrt", "type":"f64", - "ulps" : "52", "Worst value": "0x401000a2e6200146", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"tanh", "type":"f16", - "ulps" : "10", "Worst value": "0x2c95", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"tanh", "type":"f32", - "ulps" : "23", "Worst value": "0x3e00adb0", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"tanh", "type":"f64", - "ulps" : "52", "Worst value": "0x3fe1940bba232817", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"tan", "type":"f16", - "ulps" : "10", "Worst value": "0x607b", - "ulps for denormalized output" : "10", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"tan", "type":"f32", - "ulps" : "22", "Worst value": "0x788cd3ba", - "ulps for denormalized output" : "23", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}, -{ - "func":"tan", "type":"f64", - "ulps" : "51", "Worst value": "0x5044dad5254e9888", - "ulps for denormalized output" : "52", "Worst value for dnz output" : "0x0", - "NaN Error":"false", "Value causing NaN":"0x0", - "Inf Error":"false", "Value causing Inf error":"0x0" -}]} diff --git a/include/nsimd/c_adv_api.h b/include/nsimd/c_adv_api.h new file mode 100644 index 00000000..b09d2044 --- /dev/null +++ b/include/nsimd/c_adv_api.h @@ -0,0 +1,74 @@ +/* + +Copyright (c) 2021 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#ifndef NSIMD_C_ADV_API_H +#define NSIMD_C_ADV_API_H + +#include + +#if NSIMD_C >= 2011 + +NSIMD_INLINE void nsimd_c11_type_unsupported(void) {} + +/* ------------------------------------------------------------------------- */ + +#include + +/* ------------------------------------------------------------------------- */ +/* We add by hand parametrized loads/stores. */ + +/* loads */ + +#define nsimd_load_aligned(type, ptr) nsimd_loada(type, ptr) +#define nsimd_load_unaligned(type, ptr) nsimd_loadu(type, ptr) + +#define nsimd_load(alignment, type, ptr) \ + NSIMD_PP_CAT_2(nsimd_load_, alignment)(type, ptr) + +/* stores */ + +#define nsimd_store_aligned(ptr, vec) nsimd_storea(ptr, vec) +#define nsimd_store_unaligned(ptr, vec) nsimd_storeu(ptr, vec) + +#define nsimd_store(alignment, ptr, vec) \ + NSIMD_PP_CAT_2(nsimd_store_, alignment)(ptr, vec) + +/* ------------------------------------------------------------------------- */ +/* Generic types */ + +#define nsimd_pack(type) NSIMD_PP_CAT_2(nsimd_pack_, type) +#define nsimd_packl(type) NSIMD_PP_CAT_2(nsimd_packl_, type) +#define nsimd_packx2(type) NSIMD_PP_CAT_2(nsimd_packx2_, type) +#define nsimd_packx3(type) NSIMD_PP_CAT_2(nsimd_packx3_, type) +#define nsimd_packx4(type) NSIMD_PP_CAT_2(nsimd_packx4_, type) + +#define nsimd_pack_a(type, simd_ext) NSIMD_PP_CAT_3(nsimd_pack_, type) +#define nsimd_packl_a(type, simd_ext) NSIMD_PP_CAT_3(nsimd_packl_, type) +#define nsimd_packx2_a(type, simd_ext) NSIMD_PP_CAT_3(nsimd_packx2_, type) +#define nsimd_packx3_a(type, simd_ext) NSIMD_PP_CAT_3(nsimd_packx3_, type) +#define nsimd_packx4_a(type, simd_ext) NSIMD_PP_CAT_3(nsimd_packx4_, type) + +#endif /* NSIMD_C >= 2011 */ + +#endif /* NSIMD_C_ADV_API_HPP */ diff --git a/include/nsimd/cxx_adv_api.hpp b/include/nsimd/cxx_adv_api.hpp index 32d70e00..242837ca 100644 --- a/include/nsimd/cxx_adv_api.hpp +++ b/include/nsimd/cxx_adv_api.hpp @@ -1,6 +1,6 @@ /* -Copyright (c) 2019 Agenium Scale +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -69,13 +69,25 @@ NSIMD_STRUCT pack { // Underlying native SIMD vector getter simd_vector native_register() const { return car; } + // Arithmetic and assignment operators + pack &operator+=(pack const &other); + pack &operator-=(pack const &other); + pack &operator*=(pack const &other); + pack &operator/=(pack const &other); + pack &operator|=(pack const &other); + pack &operator&=(pack const &other); + pack &operator^=(pack const &other); + pack &operator<<=(int); + pack &operator>>=(int); + + // For std::cout'ing a pack friend std::ostream &operator<<(std::ostream &os, pack const &a0) { T buf[max_len_t::value]; storeu(buf, a0.car, T(), SimdExt()); os << "{ "; int n = len(a0); for (int i = 0; i < n; i++) { - os << buf[i]; + os << to_biggest(buf[i]); if (i < n - 1) { os << ", "; } @@ -104,6 +116,18 @@ NSIMD_STRUCT pack { car = set1(T(s), T(), SimdExt()); } + // Arithmetic and assignment operators + pack &operator+=(pack const &other); + pack &operator-=(pack const &other); + pack &operator*=(pack const &other); + pack &operator/=(pack const &other); + pack &operator|=(pack const &other); + pack &operator&=(pack const &other); + pack &operator^=(pack const &other); + pack &operator<<=(int); + pack &operator>>=(int); + + // For std::cout'ing a pack friend std::ostream &operator<<(std::ostream &os, pack const &a0) { os << pack(a0.car) << ", " << a0.cdr; return os; @@ -151,6 +175,7 @@ NSIMD_STRUCT packl { // Underlying native SIMD vector getter simd_vectorl native_register() const { return car; } + // For std::cout'ing a packl friend std::ostream &operator<<(std::ostream &os, packl const &a0) { T buf[max_len_t::value]; storelu(buf, a0.car, T(), SimdExt()); @@ -184,6 +209,7 @@ NSIMD_STRUCT packl { car = set1l(int(s), T(), SimdExt()); } + // For std::cout'ing a packl friend std::ostream &operator<<(std::ostream &os, packl const &a0) { os << packl(a0.car) << ", " << a0.cdr; return os; @@ -559,6 +585,149 @@ int nbtrue(packl const &a0) { namespace nsimd { +// ---------------------------------------------------------------------------- +// Arithmetic and assignment operators + +// add +template +pack &pack:: +operator+=(pack const &other) { + this->car = add(this->car, other.car, T()); + return *this; +} + +template +pack &pack:: +operator+=(pack const &other) { + this->car = add(this->car, other.car, T()); + this->cdr += other.cdr; + return *this; +} + +// sub +template +pack &pack:: +operator-=(pack const &other) { + this->car = sub(this->car, other.car, T()); + return *this; +} + +template +pack &pack:: +operator-=(pack const &other) { + this->car = sub(this->car, other.car, T()); + this->cdr -= other.cdr; + return *this; +} + +// mul +template +pack &pack:: +operator*=(pack const &other) { + this->car = mul(this->car, other.car, T()); + return *this; +} + +template +pack &pack:: +operator*=(pack const &other) { + this->car = mul(this->car, other.car, T()); + this->cdr *= other.cdr; + return *this; +} + +// div +template +pack &pack:: +operator/=(pack const &other) { + this->car = div(this->car, other.car, T()); + return *this; +} + +template +pack &pack:: +operator/=(pack const &other) { + this->car = div(this->car, other.car, T()); + this->cdr /= other.cdr; + return *this; +} + +// orb +template +pack &pack:: +operator|=(pack const &other) { + this->car = orb(this->car, other.car, T()); + return *this; +} + +template +pack &pack:: +operator|=(pack const &other) { + this->car = orb(this->car, other.car, T()); + this->cdr |= other.cdr; + return *this; +} + +// andb +template +pack &pack:: +operator&=(pack const &other) { + this->car = andb(this->car, other.car, T()); + return *this; +} + +template +pack &pack:: +operator&=(pack const &other) { + this->car = andb(this->car, other.car, T()); + this->cdr &= other.cdr; + return *this; +} + +// xorb +template +pack &pack:: +operator^=(pack const &other) { + this->car = xorb(this->car, other.car, T()); + return *this; +} + +template +pack &pack:: +operator^=(pack const &other) { + this->car = xorb(this->car, other.car, T()); + this->cdr ^= other.cdr; + return *this; +} + +// left shift +template +pack &pack::operator<<=(int s) { + this->car = shl(this->car, s, T()); + return *this; +} + +template +pack &pack::operator<<=(int s) { + this->car = shl(this->car, s, T()); + this->cdr <<= s; + return *this; +} + +// right shift +template +pack &pack::operator>>=(int s) { + this->car = shr(this->car, s, T()); + return *this; +} + +template +pack &pack::operator>>=(int s) { + this->car = shr(this->car, s, T()); + this->cdr >>= s; + return *this; +} + // ---------------------------------------------------------------------------- // The if_else function cannot be auto-generated diff --git a/include/nsimd/cxx_adv_api_aliases.hpp b/include/nsimd/cxx_adv_api_aliases.hpp new file mode 100644 index 00000000..3befe252 --- /dev/null +++ b/include/nsimd/cxx_adv_api_aliases.hpp @@ -0,0 +1,59 @@ +/* + +Copyright (c) 2021 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#ifndef NSIMD_CXX_ADV_API_ALIASES_HPP +#define NSIMD_CXX_ADV_API_ALIASES_HPP + +#include + +namespace nsimd { + +/* ------------------------------------------------------------------------- */ + +template +pack fabs(pack const &a0) { + return abs(a0); +} + +/* ------------------------------------------------------------------------- */ + +template +pack fmin(pack const &a0, + pack const &a1) { + return min(a0, a1); +} + +/* ------------------------------------------------------------------------- */ + +template +pack fmax(pack const &a0, + pack const &a1) { + return max(a0, a1); +} + +/* ------------------------------------------------------------------------- */ + +} // namespace nsimd + +#endif diff --git a/include/nsimd/modules/fixed_point/fixed_math.hpp b/include/nsimd/modules/fixed_point/fixed_math.hpp index 961a23f3..e24e72a2 100644 --- a/include/nsimd/modules/fixed_point/fixed_math.hpp +++ b/include/nsimd/modules/fixed_point/fixed_math.hpp @@ -66,7 +66,7 @@ SOFTWARE. // #include "nsimd/modules/fixed_point/function/round.hpp" // Trigonometric functions -// #include "nsimd/modules/fixed_point/function/sin.hpp" +#include "nsimd/modules/fixed_point/function/sin.hpp" // #include "nsimd/modules/fixed_point/function/cos.hpp" // #include "nsimd/modules/fixed_point/function/tan.hpp" // #include "nsimd/modules/fixed_point/function/csc.hpp" diff --git a/include/nsimd/modules/memory_management.hpp b/include/nsimd/modules/memory_management.hpp index 0c127fb7..57ede593 100644 --- a/include/nsimd/modules/memory_management.hpp +++ b/include/nsimd/modules/memory_management.hpp @@ -1,6 +1,6 @@ /* -Copyright (c) 2020 Agenium Scale +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -138,6 +138,67 @@ template void copy_to_host(T *host_ptr, T *device_ptr, size_t sz) { (size_t)sz); \ } +// ---------------------------------------------------------------------------- +// oneAPI + +#elif defined(NSIMD_ONEAPI) + +template T *device_malloc(const size_t sz) { + return sycl::malloc_device(sz, nsimd::oneapi::default_queue()); +} + +template T *device_calloc(const size_t sz) { + sycl::queue q = nsimd::oneapi::default_queue(); + T *const ret = sycl::malloc_device(sz, q); + if (ret == NULL) { + return NULL; + } + q.memset((void *)ret, 0, sz * sizeof(T)).wait_and_throw(); + return ret; +} + +template void device_free(T *const ptr) { + sycl::queue q = nsimd::oneapi::default_queue(); + sycl::free(ptr, q); +} + +template +void copy_to_device(T *const device_ptr, const T *const host_ptr, + const size_t sz) { + sycl::queue q = nsimd::oneapi::default_queue(); + q.memcpy((void *)device_ptr, (const void *)host_ptr, sz * sizeof(T)) + .wait_and_throw(); +} + +template +void copy_to_host(T *const host_ptr, const T *const device_ptr, size_t sz) { + sycl::queue q = nsimd::oneapi::default_queue(); + q.memcpy((void *)host_ptr, (const void *)device_ptr, sz * sizeof(T)) + .wait_and_throw(); +} + +#define nsimd_fill_dev_mem_func(func_name, expr) \ + template \ + void kernel_##func_name##_(T *const ptr, const size_t sz, \ + sycl::nd_item<1> item) { \ + const size_t i = item.get_global_id().get(0); \ + if (i < sz) { \ + ptr[i] = nsimd::to(expr); \ + } \ + } \ + \ + template void func_name(T *const ptr, const size_t sz) { \ + const size_t total_num_threads = \ + nsimd::compute_total_num_threads(sz, THREADS_PER_BLOCK); \ + sycl::queue q = nsimd::oneapi::default_queue(); \ + q.parallel_for(sycl::nd_range<1>(sycl::range<1>(total_num_threads), \ + sycl::range<1>(THREADS_PER_BLOCK)), \ + [=](sycl::nd_item<1> item) { \ + kernel_##func_name##_(ptr, sz, item); \ + }) \ + .wait_and_throw(); \ + } + // ---------------------------------------------------------------------------- // CPU @@ -184,7 +245,7 @@ struct paired_pointers_t { template paired_pointers_t pair_malloc(size_t sz) { paired_pointers_t ret; ret.sz = 0; -#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || defined(NSIMD_ONEAPI) ret.device_ptr = device_malloc(sz); if (ret.device_ptr == NULL) { ret.host_ptr = NULL; @@ -217,7 +278,7 @@ template paired_pointers_t pair_malloc_or_exit(size_t sz) { template paired_pointers_t pair_calloc(size_t sz) { paired_pointers_t ret; ret.sz = 0; -#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || defined(NSIMD_ONEAPI) ret.device_ptr = device_calloc(sz); if (ret.device_ptr == NULL) { ret.host_ptr = NULL; @@ -248,7 +309,7 @@ template paired_pointers_t pair_calloc_or_exit(size_t sz) { } template void pair_free(paired_pointers_t p) { -#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || defined(NSIMD_ONEAPI) device_free(p.device_free); free((void *)p.host_ptr); #else @@ -257,7 +318,7 @@ template void pair_free(paired_pointers_t p) { } template void copy_to_device(paired_pointers_t p) { -#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || defined(NSIMD_ONEAPI) copy_to_device(p.device_ptr, p.host_ptr, p.sz); #else (void)p; @@ -265,7 +326,7 @@ template void copy_to_device(paired_pointers_t p) { } template void copy_to_host(paired_pointers_t p) { -#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || defined(NSIMD_ONEAPI) copy_to_host(p.host_ptr, p.device_ptr, p.sz); #else (void)p; diff --git a/include/nsimd/modules/spmd.hpp b/include/nsimd/modules/spmd.hpp index 68c40e26..10eb3d93 100644 --- a/include/nsimd/modules/spmd.hpp +++ b/include/nsimd/modules/spmd.hpp @@ -54,9 +54,9 @@ namespace spmd { #endif // ---------------------------------------------------------------------------- -// CUDA and ROCm +// GPUs: CUDA, ROCm or oneAPI -#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || defined(NSIMD_ONEAPI) #if defined(NSIMD_CUDA) @@ -73,7 +73,7 @@ namespace spmd { int spmd_i_ = threadIdx.x + blockIdx.x * blockDim.x; \ if (spmd_i_ < n) { -#else +#elif defined(NSIMD_ROCM) // 1d kernel definition #define spmd_kernel_1d(name, ...) \ @@ -89,12 +89,30 @@ namespace spmd { size_t spmd_i_ = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; \ if (spmd_i_ < n) { +#else + +// 1d kernel definition +#define spmd_kernel_1d(name, ...) \ + template \ + inline void name(__VA_ARGS__, const size_t n, sycl::nd_item<1> item) { \ + size_t spmd_i_ = item.get_global_id().get(0); \ + if (spmd_i_ < n) { + +// templated kernel definition +#define spmd_tmpl_kernel_1d(name, template_argument, ...) \ + template \ + inline void name(__VA_ARGS__, const size_t n, sycl::nd_item<1> item) { \ + size_t spmd_i_ = item.get_global_id().get(0); \ + if (spmd_i_ < n) { + #endif #define spmd_kernel_end \ } \ } +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) + // device function #define spmd_dev_func(type_name, ...) \ template __device__ type_name(__VA_ARGS__) { @@ -104,6 +122,19 @@ namespace spmd { template \ __device__ type_name(__VA_ARGS__) { +#else + +// device function +#define spmd_dev_func(type_name, ...) \ + template type_name(__VA_ARGS__) { + +// templated device function +#define spmd_tmpl_dev_func(type_name, template_argument, ...) \ + template \ + type_name(__VA_ARGS__) { + +#endif + #define spmd_dev_func_end } // call spmd_dev_function @@ -119,18 +150,33 @@ namespace spmd { #define spmd_launch_kernel_1d(name, spmd_scalar_bits_, threads_per_block, n, \ ...) \ name \ - <<<(unsigned int)((n + threads_per_block - 1) / threads_per_block), \ + <<<(unsigned int)nsimd_kernel_param(n, threads_per_block), \ (unsigned int)(threads_per_block)>>>(__VA_ARGS__, (int)n) -#else +#elif defined(NSIMD_ROCM) // launch 1d kernel ROCm #define spmd_launch_kernel_1d(name, spmd_scalar_bits_, threads_per_block, n, \ ...) \ - hipLaunchKernelGGL( \ - (name), \ - (size_t)((n + threads_per_block - 1) / threads_per_block), \ - (size_t)(threads_per_block), 0, NULL, __VA_ARGS__, (size_t)n) + hipLaunchKernelGGL((name), \ + (size_t)nsimd_kernel_param(n, threads_per_block), \ + (size_t)(threads_per_block), 0, NULL, __VA_ARGS__, \ + (size_t)n) + +#else + +// launch 1d kernel oneAPI +#define spmd_launch_kernel_1d(name, spmd_scalar_bits_, threads_per_block, n, \ + ...) \ + size_t total_num_threads = \ + (size_t)nsimd_kernel_param(n, threads_per_block); \ + sycl::queue q = nsimd::oneapi::default_queue(); \ + q.parallel_for(sycl::nd_range<1>(sycl::range<1>(total_num_threads), \ + sycl::range<1>(threads_per_block)), \ + [=](sycl::nd_item<1> item) { \ + name(__VA_ARGS__, (size_t)n, item); \ + }) \ + .wait_and_throw(); #endif @@ -182,8 +228,13 @@ template <> struct type_t<64> { #define k_unmasked_load(base_addr) k_load(base_addr) // f32 <--> f16 conversions +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) #define k_f32_to_f16(a) __float2half(a) #define k_f16_to_f32(a) __half2float(a) +#else +#define k_f32_to_f16(a) f16(a) +#define k_f16_to_f32(a) static_cast(a) +#endif // assignment statement #define k_set(var, value) \ diff --git a/include/nsimd/modules/tet1d.hpp b/include/nsimd/modules/tet1d.hpp index fb173dae..1bd3c568 100644 --- a/include/nsimd/modules/tet1d.hpp +++ b/include/nsimd/modules/tet1d.hpp @@ -1,6 +1,6 @@ /* -Copyright (c) 2019 Agenium Scale +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -106,6 +106,31 @@ __global__ void gpu_kernel_component_wise_mask(T *dst, Mask const mask, } } +#elif defined(NSIMD_ONEAPI) + +// oneAPI component wise kernel +template +void oneapi_kernel_component_wise(T *dst, Expr const expr, + nsimd::nat n, sycl::nd_item<1> item) { + const int i = static_cast(item.get_global_id().get(0)); + if (i < n) { + dst[i] = expr.gpu_get(i); + } +} + +// oneAPI component wise kernel with masked output +template +void oneapi_kernel_component_wise_mask(T *dst, Mask const mask, + Expr const expr, + nsimd::nat n, + sycl::nd_item<1> item) { + + nsimd::nat i = static_cast(item.get_global_id().get(0)); + if (i < n && mask.gpu_get(i)) { + dst[i] = expr.gpu_get(i); + } +} + #else // CPU component wise kernel @@ -199,6 +224,8 @@ template struct node { #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) __device__ T gpu_get(nsimd::nat) const { return value; } +#elif defined(NSIMD_ONEAPI) + T gpu_get(nsimd::nat) const { return value; } #else T scalar_get(nsimd::nat) const { return value; } template @@ -227,7 +254,6 @@ template struct to_node_t { template struct to_node_t > { typedef node type; - static type impl(type node) { return node; } }; @@ -263,6 +289,8 @@ template struct node { #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) __device__ T gpu_get(nsimd::nat i) const { return data[i]; } +#elif defined(NSIMD_ONEAPI) + T gpu_get(nsimd::nat i) const { return data[i]; } #else T scalar_get(nsimd::nat i) const { return data[i]; } template @@ -313,25 +341,35 @@ struct node { template node operator=(node const &expr) { -#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || defined(NSIMD_ONEAPI) nsimd::nat expr_size = compute_size(mask.size(), expr.size()); nsimd::nat nt = threads_per_block < 0 ? 128 : threads_per_block; - nsimd::nat nb = (expr_size + nt - 1) / nt; // div rounded up + nsimd::nat param = nsimd_kernel_param(expr_size, nt); assert(nt > 0 && nt <= UINT_MAX); - assert(nb > 0 && nb <= UINT_MAX); + assert(param > 0 && param <= UINT_MAX); #if defined(NSIMD_CUDA) cudaStream_t s = (stream == NULL ? NULL : *(cudaStream_t *)stream); // clang-format off - gpu_kernel_component_wise_mask<<<(unsigned int)(nb), (unsigned int)(nt), + gpu_kernel_component_wise_mask<<<(unsigned int)(param), (unsigned int)(nt), 0, s>>> (data, mask, expr, expr_size); // clang-format on #elif defined(NSIMD_ROCM) hipStream_t s = stream == NULL ? NULL : *(hipStream_t *)stream; - hipLaunchKernelGGL(gpu_kernel_component_wise_mask, (unsigned int)(nb), + hipLaunchKernelGGL(gpu_kernel_component_wise_mask, (unsigned int)(param), (unsigned int)(nt), 0, s, data, mask, expr, expr_size); +#else + sycl::queue q = nsimd::oneapi::default_queue(); + q.parallel_for(sycl::nd_range<1>(sycl::range<1>((size_t)param), + sycl::range<1>((size_t)nt)), + [=, *this](sycl::nd_item<1> item) { + oneapi_kernel_component_wise_mask(data, mask, expr, + expr_size, item); + }) + .wait_and_throw(); + #endif #else cpu_kernel_component_wise_mask( @@ -376,16 +414,16 @@ template struct node { template node operator=(node const &expr) { -#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) +#if defined(NSIMD_CUDA) || defined(NSIMD_ROCM) || defined(NSIMD_ONEAPI) nsimd::nat nt = threads_per_block < 0 ? 128 : threads_per_block; - nsimd::nat nb = (expr.size() + nt - 1) / nt; // div rounded up + nsimd::nat param = nsimd_kernel_param(expr.size(), nt); assert(nt > 0 && nt <= UINT_MAX); - assert(nb > 0 && nb <= UINT_MAX); + assert(param > 0 && param <= UINT_MAX); #if defined(NSIMD_CUDA) cudaStream_t s = stream == NULL ? NULL : *(cudaStream_t *)stream; // clang-format off - gpu_kernel_component_wise<<<(unsigned int)(nb), (unsigned int)(nt), + gpu_kernel_component_wise<<<(unsigned int)(param), (unsigned int)(nt), 0, s>>>(data, expr, expr.size()); // clang-format on @@ -393,7 +431,17 @@ template struct node { hipStream_t s = stream == NULL ? NULL : *(hipStream_t *)stream; hipLaunchKernelGGL( (gpu_kernel_component_wise >), - (unsigned int)(nb), (unsigned int)(nt), 0, s, data, expr, expr.size()); + (unsigned int)(param), (unsigned int)(nt), 0, s, data, expr, + expr.size()); +#else + sycl::queue q = nsimd::oneapi::default_queue(); + q.parallel_for( + sycl::nd_range<1>(sycl::range<1>((size_t)param), + sycl::range<1>((size_t)nt)), + [=, *this](sycl::nd_item<1> item) { + oneapi_kernel_component_wise(data, expr, expr.size(), item); + }) + .wait_and_throw(); #endif #else cpu_kernel_component_wise(data, expr, expr.size()); diff --git a/src/ulps.cpp b/include/nsimd/nsimd-all.h similarity index 52% rename from src/ulps.cpp rename to include/nsimd/nsimd-all.h index 9383495f..90922c8f 100644 --- a/src/ulps.cpp +++ b/include/nsimd/nsimd-all.h @@ -1,6 +1,6 @@ /* -Copyright (c) 2019 Agenium Scale +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,37 +22,10 @@ SOFTWARE. */ -#define NSIMD_INSIDE -#include - -// ---------------------------------------------------------------------------- - -static int nsimd_upper_log2(u64 a) { - int l = 0; - for (; ((u64)1 << l) < a; l++); - return l; -} - -// ---------------------------------------------------------------------------- +#ifndef NSIMD_ALL_H +#define NSIMD_ALL_H -extern "C" { - -NSIMD_DLLEXPORT int nsimd_diff_in_logulps_f16(f16 a, f16 b) { - int d = nsimd_scalar_reinterpret_i16_f16(a) - - nsimd_scalar_reinterpret_i16_f16(b); - return nsimd_upper_log2((u64)(d >= 0 ? d : -d)); -} - -NSIMD_DLLEXPORT int nsimd_diff_in_logulps_f32(f32 a, f32 b) { - int d = nsimd_scalar_reinterpret_i32_f32(a) - - nsimd_scalar_reinterpret_i32_f32(b); - return nsimd_upper_log2((u64)(d >= 0 ? d : -d)); -} - -NSIMD_DLLEXPORT int nsimd_diff_in_logulps_f64(f64 a, f64 b) { - i64 d = nsimd_scalar_reinterpret_i64_f64(a) - - nsimd_scalar_reinterpret_i64_f64(b); - return nsimd_upper_log2((u64)(d >= 0 ? d : -d)); -} +#include +#include -} // extern "C" +#endif diff --git a/include/nsimd/nsimd-all.hpp b/include/nsimd/nsimd-all.hpp index f2868aa9..6be10957 100644 --- a/include/nsimd/nsimd-all.hpp +++ b/include/nsimd/nsimd-all.hpp @@ -1,6 +1,6 @@ /* -Copyright (c) 2019 Agenium Scale +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,6 +27,7 @@ SOFTWARE. #include #include +#include #include #endif diff --git a/include/nsimd/nsimd.h b/include/nsimd/nsimd.h index 23f9550a..36c6dd1a 100644 --- a/include/nsimd/nsimd.h +++ b/include/nsimd/nsimd.h @@ -1,6 +1,6 @@ /* -Copyright (c) 2020 Agenium Scale +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -33,6 +33,8 @@ SOFTWARE. /* Detect host compiler */ #if defined(_MSC_VER) #define NSIMD_IS_MSVC +#elif defined(__ibmxl_version__) + #define NSIMD_IS_XLC #elif defined(__FCC_version__) #define NSIMD_IS_FCC #elif defined(__INTEL_COMPILER) @@ -104,6 +106,7 @@ SOFTWARE. #if NSIMD_CXX >= 2020 #include + #include #endif /* ------------------------------------------------------------------------- */ @@ -333,12 +336,12 @@ namespace nsimd { /* PPC */ -#if (defined(POWER8) || defined(ALTIVEC)) && !defined(NSIMD_POWER8) -#define NSIMD_POWER8 +#if (defined(VMX) || defined(ALTIVEC)) && !defined(NSIMD_VMX) +#define NSIMD_VMX #endif -#if defined(POWER7) && !defined(NSIMD_POWER7) -#define NSIMD_POWER7 +#if defined(VSX) && !defined(NSIMD_VSX) +#define NSIMD_VSX #endif /* CUDA */ @@ -357,6 +360,11 @@ namespace nsimd { #if defined(ONEAPI) && !defined(NSIMD_ONEAPI) #define NSIMD_ONEAPI + /* undef ONEAPI is needed because ONEAPI is used as a namespace in DPC++: + sycl::ONEAPI */ + #ifdef ONEAPI + #undef ONEAPI + #endif #endif /* ------------------------------------------------------------------------- */ @@ -406,7 +414,7 @@ namespace nsimd { struct cpu {}; struct sse2 {}; struct sse42 {}; - #if NSIMD_CXX >= 2020 + #if nsIMD_CXX >= 2020 template concept simd_ext_c = std::is_same_v || std::is_same_v || @@ -696,14 +704,51 @@ namespace nsimd { } // namespace nsimd #endif -#elif defined(NSIMD_POWER7) +#elif defined(NSIMD_VMX) + + #define NSIMD_PLATFORM ppc + #define NSIMD_SIMD vmx + + #ifdef NSIMD_IS_CLANG + /* New version of clang are spamming useless warning comming from their */ + /* altivec.h file */ + #pragma clang diagnostic ignored "-Wc11-extensions" + #pragma clang diagnostic ignored "-Wc++11-long-long" + #endif + + #include + + #ifdef bool + #undef bool + #endif + #ifdef pixel + #undef pixel + #endif + #ifdef vector + #undef vector + #endif + + #if NSIMD_CXX > 0 + namespace nsimd { + struct cpu {}; + struct vmx {}; + #if NSIMD_CXX >= 2020 + template + concept simd_ext_c = std::is_same_v || + std::is_same_v; + #define NSIMD_LIST_SIMD_EXT cpu, vmx + #endif + } // namespace nsimd + #endif + +#elif defined(NSIMD_VSX) #define NSIMD_PLATFORM ppc - #define NSIMD_SIMD power7 + #define NSIMD_SIMD vsx #ifdef NSIMD_IS_CLANG - // New version of clang are spamming useless warning comming from their - // altivec.h file + /* New version of clang are spamming useless warning comming from their */ + /* altivec.h file */ #pragma clang diagnostic ignored "-Wc11-extensions" #pragma clang diagnostic ignored "-Wc++11-long-long" #endif @@ -723,12 +768,14 @@ namespace nsimd { #if NSIMD_CXX > 0 namespace nsimd { struct cpu {}; - struct power7 {}; + struct vmx {}; + struct vsx {}; #if NSIMD_CXX >= 2020 template concept simd_ext_c = std::is_same_v || - std::is_same_v; - #define NSIMD_LIST_SIMD_EXT cpu, power7 + std::is_same_v || + std::is_same_v; + #define NSIMD_LIST_SIMD_EXT cpu, vsx #endif } // namespace nsimd #endif @@ -756,8 +803,24 @@ namespace nsimd { #include #endif - #ifdef NSIMD_ONEAPI + #if defined(NSIMD_ONEAPI) && NSIMD_CXX > 0 #include + + extern "C" { + + NSIMD_DLLSPEC void *nsimd_oneapi_default_queue(); + + } // extern "C" + + namespace nsimd { + namespace oneapi { + + NSIMD_INLINE sycl::queue &default_queue() { + return *(sycl::queue *)nsimd_oneapi_default_queue(); + } + + } // namespace oneapi + } // namespace nsimd #endif #define NSIMD_SIMD cpu @@ -804,7 +867,16 @@ namespace nsimd { #include #endif -#ifdef NSIMD_IS_MSVC +#if defined(NSIMD_ONEAPI) + typedef sycl::cl_char i8; + typedef sycl::cl_uchar u8; + typedef sycl::cl_short i16; + typedef sycl::cl_ushort u16; + typedef sycl::cl_int i32; + typedef sycl::cl_uint u32; + typedef sycl::cl_long i64; + typedef sycl::cl_ulong u64; +#elif defined(NSIMD_IS_MSVC) typedef unsigned __int8 u8; typedef signed __int8 i8; typedef unsigned __int16 u16; @@ -836,7 +908,10 @@ namespace nsimd { typedef signed int i32; #endif #endif - #if NSIMD_WORD_SIZE == 64 + #if defined(NSIMD_VMX) || defined(NSIMD_VSX) + typedef nsimd_ulonglong u64; + typedef nsimd_longlong i64; + #elif NSIMD_WORD_SIZE == 64 #ifdef __UINT64_TYPE__ typedef nsimd_uint64_type u64; #else @@ -905,14 +980,19 @@ namespace nsimd { typedef __half f16; #define NSIMD_NATIVE_FP16 #elif defined(NSIMD_ONEAPI) - typedef half f16; + typedef sycl::half f16; #define NSIMD_NATIVE_FP16 #else typedef struct { u16 u; } f16; #endif -typedef float f32; -typedef double f64; +#if defined(NSIMD_ONEAPI) + typedef sycl::cl_float f32; + typedef sycl::cl_double f64; +#else + typedef float f32; + typedef double f64; +#endif /* ------------------------------------------------------------------------- */ /* Native register size (for now only 32 and 64 bits) types */ @@ -1259,6 +1339,8 @@ using simd_vectorl = typename simd_traits::simd_vectorl; #define NSIMD_MAX_ALIGNMENT 16 #endif +/* TODO: provide C++14 alignment constpexxr */ + /* clang-format on */ #define NSIMD_NB_REGISTERS NSIMD_PP_CAT_3(NSIMD_, NSIMD_SIMD, _NB_REGISTERS) @@ -1359,8 +1441,13 @@ NSIMD_DLLSPEC void nsimd_aligned_free(void *); #if NSIMD_CXX > 0 namespace nsimd { -NSIMD_DLLSPEC void *aligned_alloc(nsimd_nat); -NSIMD_DLLSPEC void aligned_free(void *); +NSIMD_INLINE void *aligned_alloc(nsimd_nat n) { + return nsimd_aligned_alloc(n); +} + +NSIMD_INLINE void aligned_free(void *ptr) { + nsimd_aligned_free(ptr); +} template T *aligned_alloc_for(nsimd_nat n) { return (T *)aligned_alloc(n * (nsimd_nat)sizeof(T)); @@ -1516,7 +1603,7 @@ inline f16 nsimd_f32_to_f16(f32 a) { } inline f32 nsimd_f16_to_f32(f16 a) { return nsimd_u16_to_f32(*(u16 *)&a); } #elif defined(NSIMD_ONEAPI) -inline f16 nsimd_f32_to_f16(f32 a) { return static_cast(a); } +inline f16 nsimd_f32_to_f16(f32 a) { return static_cast(a); } inline f32 nsimd_f16_to_f32(f16 a) { return static_cast(a); } #else NSIMD_DLLSPEC f16 nsimd_f32_to_f16(f32); @@ -1544,6 +1631,39 @@ NSIMD_DLLSPEC f32 f16_to_f32(f16); } // namespace nsimd #endif +/* ------------------------------------------------------------------------- */ +/* Helper to print scalar values, converts to bigger type */ + +NSIMD_INLINE u64 nsimd_to_biggest_u8(u8 a) { return (u64)a; } +NSIMD_INLINE u64 nsimd_to_biggest_u16(u16 a) { return (u64)a; } +NSIMD_INLINE u64 nsimd_to_biggest_u32(u32 a) { return (u64)a; } +NSIMD_INLINE u64 nsimd_to_biggest_u64(u64 a) { return a; } +NSIMD_INLINE i64 nsimd_to_biggest_i8(i8 a) { return (i64)a; } +NSIMD_INLINE i64 nsimd_to_biggest_i16(i16 a) { return (i64)a; } +NSIMD_INLINE i64 nsimd_to_biggest_i32(i32 a) { return (i64)a; } +NSIMD_INLINE i64 nsimd_to_biggest_i64(i64 a) { return a; } +NSIMD_INLINE f64 nsimd_to_biggest_f16(f16 a) { + return (f64)nsimd_f16_to_f32(a); +} +NSIMD_INLINE f64 nsimd_to_biggest_f32(f32 a) { return (f64)a; } +NSIMD_INLINE f64 nsimd_to_biggest_f64(f64 a) { return a; } + +#if NSIMD_CXX > 0 +namespace nsimd { +NSIMD_INLINE u64 to_biggest(u8 a) { return nsimd_to_biggest_u8(a); } +NSIMD_INLINE u64 to_biggest(u16 a) { return nsimd_to_biggest_u16(a); } +NSIMD_INLINE u64 to_biggest(u32 a) { return nsimd_to_biggest_u32(a); } +NSIMD_INLINE u64 to_biggest(u64 a) { return nsimd_to_biggest_u64(a); } +NSIMD_INLINE i64 to_biggest(i8 a) { return nsimd_to_biggest_i8(a); } +NSIMD_INLINE i64 to_biggest(i16 a) { return nsimd_to_biggest_i16(a); } +NSIMD_INLINE i64 to_biggest(i32 a) { return nsimd_to_biggest_i32(a); } +NSIMD_INLINE i64 to_biggest(i64 a) { return nsimd_to_biggest_i64(a); } +NSIMD_INLINE f64 to_biggest(f16 a) { return nsimd_to_biggest_f16(a); } +NSIMD_INLINE f64 to_biggest(f32 a) { return nsimd_to_biggest_f32(a); } +NSIMD_INLINE f64 to_biggest(f64 a) { return nsimd_to_biggest_f64(a); } +} // namespace nsimd +#endif + /* ------------------------------------------------------------------------- */ /* General conversion for C++ */ @@ -1599,6 +1719,14 @@ T to(S value) { so we silence the warning manually for now. */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wuninitialized" +#elif defined(NSIMD_IS_GCC) && NSIMD_CXX > 0 && \ + (defined(NSIMD_VMX) || defined(NSIMD_VSX)) + /* When compiling POWERPC intrinsics inside C++ code with GCC we get tons of + -Wunused-but-set-parameter. This is a GCC bug. For now we slience the + warnings here. */ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wunused-but-set-parameter" + #pragma GCC diagnostic ignored "-Wunused-but-set-variable" #endif #include @@ -1609,6 +1737,9 @@ T to(S value) { #pragma clang diagnostic pop #elif defined(NSIMD_IS_GCC) && defined(NSIMD_SVE_FAMILY) #pragma GCC diagnostic pop +#elif defined(NSIMD_IS_GCC) && NSIMD_CXX > 0 && \ + (defined(NSIMD_VMX) || defined(NSIMD_VSX)) + #pragma GCC diagnostic pop #endif /* clang-format on */ @@ -2086,15 +2217,15 @@ NSIMD_INLINE int isnormal(f64 a) { return nsimd_isnormal_f64(a); } #endif /* ------------------------------------------------------------------------- */ -/* Difference in log ulps, returns an int. */ +/* Difference in log UFP, returns an nat, see documentation for more infos */ #if NSIMD_CXX > 0 extern "C" { #endif -NSIMD_DLLSPEC int nsimd_diff_in_logulps_f16(f16, f16); -NSIMD_DLLSPEC int nsimd_diff_in_logulps_f32(f32, f32); -NSIMD_DLLSPEC int nsimd_diff_in_logulps_f64(f64, f64); +NSIMD_DLLSPEC int nsimd_ufp_f16(f16, f16); +NSIMD_DLLSPEC int nsimd_ufp_f32(f32, f32); +NSIMD_DLLSPEC int nsimd_ufp_f64(f64, f64); #if NSIMD_CXX > 0 } // extern "C" @@ -2102,18 +2233,23 @@ NSIMD_DLLSPEC int nsimd_diff_in_logulps_f64(f64, f64); #if NSIMD_CXX > 0 namespace nsimd { -NSIMD_INLINE int diff_in_logulps(f16 a, f16 b) { - return nsimd_diff_in_logulps_f16(a, b); -} +NSIMD_INLINE int ufp(f16 a, f16 b) { return nsimd_ufp_f16(a, b); } +NSIMD_INLINE int ufp(f32 a, f32 b) { return nsimd_ufp_f32(a, b); } +NSIMD_INLINE int ufp(f64 a, f64 b) { return nsimd_ufp_f64(a, b); } +} // namespace nsimd +#endif -NSIMD_INLINE int diff_in_logulps(f32 a, f32 b) { - return nsimd_diff_in_logulps_f32(a, b); -} +/* ------------------------------------------------------------------------- */ +/* Get last kernel parameter */ -NSIMD_INLINE int diff_in_logulps(f64 a, f64 b) { - return nsimd_diff_in_logulps_f64(a, b); -} -} // namespace nsimd +#if NSIMD_CXX > 0 +extern "C" { +#endif + +NSIMD_DLLSPEC nsimd_nat nsimd_kernel_param(nsimd_nat, nsimd_nat); + +#if NSIMD_CXX > 0 +} // extern "C" #endif /* ------------------------------------------------------------------------- */ diff --git a/scripts/aarch64-linux-gnu-clang++.sh b/scripts/aarch64-linux-gnu-clang++.sh new file mode 100755 index 00000000..69f65017 --- /dev/null +++ b/scripts/aarch64-linux-gnu-clang++.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +clang++ --target=aarch64-linux-gnu "$@" diff --git a/scripts/aarch64-linux-gnu-clang.sh b/scripts/aarch64-linux-gnu-clang.sh new file mode 100755 index 00000000..517f6f7f --- /dev/null +++ b/scripts/aarch64-linux-gnu-clang.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +clang --target=aarch64-linux-gnu "$@" diff --git a/scripts/build-tests.sh b/scripts/build-tests.sh index 4c186af7..56be74e5 100644 --- a/scripts/build-tests.sh +++ b/scripts/build-tests.sh @@ -1,5 +1,5 @@ -#!/bin/sh -# Copyright (c) 2020 Agenium Scale +#!/bin/bash +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/scripts/build.sh b/scripts/build.sh index ac862b56..0a97899c 100644 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -1,5 +1,5 @@ -#!/bin/sh -# Copyright (c) 2020 Agenium Scale +#!/bin/bash +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,14 +29,14 @@ set -e # Init SETUP_SH="${PWD}/setup.sh" -NSCONFIG="${PWD}/../nstools/bin/nsconfig" +NSCONFIG="${PWD}/../nstools/nsconfig/nsconfig" HATCH_PY="${PWD}/../egg/hatch.py" BUILD_ROOT="${PWD}/.." ############################################################################### # Run setup -sh "${SETUP_SH}" +bash "${SETUP_SH}" ############################################################################### # Generate NSIMD diff --git a/scripts/ci-scale.txt b/scripts/ci-scale.txt index aa5b23ea..1da226b5 100644 --- a/scripts/ci-scale.txt +++ b/scripts/ci-scale.txt @@ -1,4 +1,4 @@ -camelot.numscale.com {/home/gquintin} +camelot.hpc.scale {/home/gquintin} - mkdir cmake-build-sse2 - cd cmake-build-sse2 - cmake .. -Dsimd=sse2 @@ -15,7 +15,7 @@ camelot.numscale.com {/home/gquintin} - cd ../build-sse42-gcc - ../nstools/bin/nstest -j80 -glastonbury.numscale.com {/home/gquintin} +glastonbury.hpc.scale {/home/gquintin} - source /etc/profile.d/modules.sh - module load cmake/3.1.0 - mkdir cmake-build-avx512_skylake @@ -27,7 +27,7 @@ glastonbury.numscale.com {/home/gquintin} - cd build-avx512_skylake-gcc - ../nstools/bin/nstest -j40 -carduel.numscale.com {/home/gquintin} +carduel.hpc.scale {/home/gquintin} - source /etc/profile.d/profile.sh - module load cmake/3.1.0 - mkdir cmake-build-avx512_knl @@ -39,7 +39,7 @@ carduel.numscale.com {/home/gquintin} - cd build-avx512_knl-gcc - ../nstools/bin/nstest -j80 -gaunes.numscale.com {/home/gquintin} +gaunes.hpc.scale {/home/gquintin} - mkdir cmake-build-avx - cd cmake-build-avx - cmake .. -Dsimd=avx @@ -67,7 +67,7 @@ gaunes.numscale.com {/home/gquintin} - ninja tests - ../nstools/bin/nstest -j80 --prefix="qemu-arm" -logres.numscale.com {/home/gquintin} +logres.hpc.scale {/home/gquintin} - mkdir cmake-build-cpu - cd cmake-build-cpu - cmake .. -Dsimd=cpu @@ -84,7 +84,7 @@ logres.numscale.com {/home/gquintin} - ninja tests - ../nstools/bin/nstest -j20 -bowden.numscale.com {/home/gquintin} +bowden.hpc.scale {/home/gquintin} - bash scripts/build-tests.sh for rocm with rocm - cd build-rocm-rocm - ../nstools/bin/nstest -j80 @@ -99,7 +99,7 @@ bowden.numscale.com {/home/gquintin} - cd .. - bash tests/FindNSIMD.cmake.sh -caradigan.numscale.com {/home/gquintin} +caradigan.hpc.scale {/home/gquintin} - mkdir cmake-build-aarch64 - cd cmake-build-aarch64 - cmake .. -Dsimd=aarch64 @@ -120,7 +120,7 @@ caradigan.numscale.com {/home/gquintin} - ninja tests - ../nstools/bin/nstest -j80 -carahes.numscale.com {/home/gquintin} +carahes.hpc.scale {/home/gquintin} - source /etc/profile.d/profile.sh - module load gcc/10.2.0 - mkdir cmake-build-sve128 diff --git a/scripts/hipcc.sh b/scripts/hipcc.sh new file mode 100755 index 00000000..9b40340c --- /dev/null +++ b/scripts/hipcc.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +/opt/rocm/bin/hipcc -D__HIPCC__ -D__hcc_major__=3 -D__hcc_minor__=10 "$@" diff --git a/scripts/local-ci-rerun.ini b/scripts/local-ci-rerun.ini new file mode 100644 index 00000000..1bcf3081 --- /dev/null +++ b/scripts/local-ci-rerun.ini @@ -0,0 +1,64 @@ +# ----------------------------------------------------------------------------- +# Intel CPU/SIMD + +[sse2,sse42,avx,avx2] + +NSTEST -jNPROC + +[avx512_knl] + +module load sde/8.69.1-2021-07-18 +NSTEST --prefix="sde64 -knl --" -jNPROC + +[avx512_skylake] + +module load sde/8.69.1-2021-07-18 +NSTEST --prefix="sde64 -skx --" -jNPROC + +# ----------------------------------------------------------------------------- +# Arm + +[aarch64] + +module load qemu/6.1.0 +NSTEST --prefix="qemu-aarch64" -jNPROC + +[sve128] + +module load clang/13.0.0 +module load qemu/6.1.0 +NSTEST --prefix="qemu-aarch64 -cpu max,sve-max-vq=1" -jNPROC + +[armel] + +module load qemu/6.1.0 +NSTEST --prefix="qemu-arm" -jNPROC + +[armhf] + +module load qemu/6.1.0 +NSTEST --prefix="qemu-arm" -jNPROC + +# ----------------------------------------------------------------------------- +# PowerPC + +[vmx] + +module load clang/13.0.0 +module load qemu/6.1.0 +NSTEST --prefix="qemu-ppc64le -cpu power8" -jNPROC + +[vsx] + +module load clang/13.0.0 +module load qemu/6.1.0 +NSTEST --prefix="qemu-ppc64le -cpu power8" -jNPROC + +# ----------------------------------------------------------------------------- +# Intel oneAPI + +[oneapi] + +source /opt/intel/oneapi/setvars.sh +NSTEST -jNPROC + diff --git a/scripts/local-ci.ini b/scripts/local-ci.ini new file mode 100644 index 00000000..5a5b0a95 --- /dev/null +++ b/scripts/local-ci.ini @@ -0,0 +1,117 @@ +# ----------------------------------------------------------------------------- +# Intel CPU/SIMD + +[sse2,sse42,avx,avx2] + +NSCONFIG -Dsimd=SIMD_EXT -suite=gcc SRC_DIR +ninja TARGET +NSTEST -jNPROC + +[avx512_knl] + +NSCONFIG -Dsimd=SIMD_EXT -suite=gcc SRC_DIR +ninja TARGET +module load sde/8.69.1-2021-07-18 +NSTEST --prefix="sde64 -knl --" -jNPROC + +[avx512_skylake] + +NSCONFIG -Dsimd=SIMD_EXT -suite=gcc SRC_DIR +ninja TARGET +module load sde/8.69.1-2021-07-18 +NSTEST --prefix="sde64 -skx --" -jNPROC + +# ----------------------------------------------------------------------------- +# Arm + +[aarch64] + +module load clang/13.0.0 +NSCONFIG -Dsimd=SIMD_EXT \ + -comp=cc,clang,SRC_DIR/scripts/aarch64-linux-gnu-clang.sh,13,aarch64 \ + -comp=c++,clang,SRC_DIR/scripts/aarch64-linux-gnu-clang++.sh,13,aarch64 \ + SRC_DIR +ninja TARGET +module load qemu/6.1.0 +NSTEST --prefix="qemu-aarch64" -jNPROC + +[sve128] + +module load aarch64-linux-gnu/11.2.0 +NSCONFIG -Dsimd=SIMD_EXT \ + -comp=cc,gcc,aarch64-linux-gnu-gcc,11,aarch64 \ + -comp=c++,gcc,aarch64-linux-gnu-g++,11,aarch64 SRC_DIR +ninja TARGET +module load qemu/6.1.0 +NSTEST --prefix="qemu-aarch64 -cpu max,sve-max-vq=1" -jNPROC + +[armel] + +NSCONFIG -Dsimd=neon128 -comp=cc,gcc,arm-linux-gnueabi-gcc,6,armel \ + -comp=c++,gcc,arm-linux-gnueabi-g++,6,armel SRC_DIR +ninja TARGET +module load qemu/6.1.0 +NSTEST --prefix="qemu-arm" -jNPROC + +[armhf] + +NSCONFIG -Dsimd=neon128 -comp=cc,gcc,arm-linux-gnueabihf-gcc,6,armhf \ + -comp=c++,gcc,arm-linux-gnueabihf-g++,6,armhf SRC_DIR +ninja TARGET +module load qemu/6.1.0 +NSTEST --prefix="qemu-arm" -jNPROC + +# ----------------------------------------------------------------------------- +# PowerPC + +[vmx] + +module load clang/13.0.0 +NSCONFIG -Dsimd=vmx \ + -comp=cc,clang,SRC_DIR/scripts/powerpc64le-linux-gnu-clang.sh,7,ppc64el \ + -comp=c++,clang,SRC_DIR/scripts/powerpc64le-linux-gnu-clang++.sh,7,ppc64el \ + SRC_DIR +ninja TARGET +module load qemu/6.1.0 +NSTEST --prefix="qemu-ppc64le -cpu power8" -jNPROC + +[vsx] + +module load clang/13.0.0 +NSCONFIG -Dsimd=vsx \ + -comp=cc,clang,SRC_DIR/scripts/powerpc64le-linux-gnu-clang.sh,7,ppc64el \ + -comp=c++,clang,SRC_DIR/scripts/powerpc64le-linux-gnu-clang++.sh,7,ppc64el \ + SRC_DIR +ninja TARGET +module load qemu/6.1.0 +NSTEST --prefix="qemu-ppc64le -cpu power8" -jNPROC + +# ----------------------------------------------------------------------------- +# Intel oneAPI + +[oneapi] + +source /opt/intel/oneapi/setvars.sh +NSCONFIG -Dsimd=SIMD_EXT -suite=oneapi SRC_DIR +ninja TARGET +NSTEST -jNPROC + +# ----------------------------------------------------------------------------- +# NVIDIA CUDA (cannot be emulated, or at least I don't know how) + +[cuda] + +NSCONFIG -Dsimd=SIMD_EXT -suite=cuda SRC_DIR +ninja TARGET + +# ----------------------------------------------------------------------------- +# AMD HIP/ROCm (can be emulated with HIP-CPU) but as of now (2021/10/07) the +# library is marked as "Please note the library is being actively developed, +# and is known to be incomplet; it might also be incorrekt and there could be a +# few bad bugs lurking." so that I will wait for a first release. + +[rocm] + +NSCONFIG -Dsimd=SIMD_EXT -suite=rocm SRC_DIR +ninja TARGET + diff --git a/scripts/local-ci.sh b/scripts/local-ci.sh new file mode 100644 index 00000000..2b09d0fb --- /dev/null +++ b/scripts/local-ci.sh @@ -0,0 +1,83 @@ +#!/bin/sh + +# ----------------------------------------------------------------------------- +# Init + +INPUT="`realpath ${1}`" +TARGET="${2}" +cd `dirname $0` +ROOT="${PWD}/../build-local-ci" +mkdir -p "${ROOT}" +NPROC=`nproc` +if [ "${TARGET}" == "" ]; then + TARGET="tests" +fi + +# ----------------------------------------------------------------------------- +# Make sure we have generated nsimd + +python3 "${PWD}/../egg/hatch.py" -ltf + +# ----------------------------------------------------------------------------- +# Make sure we have the latest commit for nsconfig + +NSCONFIG="${PWD}/../nstools/nsconfig/nsconfig" +NSTEST="${PWD}/../nstools/nsconfig/nstest" + +[ -e "${NSCONFIG}" ] || ( export NSTOOLS_CHECKOUT_LAST_COMMIT=1 && \ + bash "${PWD}/../scripts/setup.sh" ) +[ -e "${NSTEST}" ] || ( export NSTOOLS_CHECKOUT_LAST_COMMIT=1 && \ + bash "${PWD}/../scripts/setup.sh" ) + +# ----------------------------------------------------------------------------- +# Parse input file + +SIMD_EXTS="" + +while read -r line; do + + # Empty lines + if [ "`echo ${line} | sed 's/[ \t]*//g'`" == "" ]; then + continue + fi + + # Comments + if [ "`echo ${line} | cut -c 1`" == "#" ]; then + continue + fi + + # New architectures + if [ "`echo ${line} | cut -c 1`" == "[" ]; then + SIMD_EXTS="`echo ${line} | sed -e 's/[][,]/ /g'`" + for s in ${SIMD_EXTS}; do + echo '#!/bin/bash' >"${ROOT}/run-${s}.sh" + echo >>"${ROOT}/run-${s}.sh" + echo 'cd `dirname $0`' >>"${ROOT}/run-${s}.sh" + echo "mkdir -p ${s}" >>"${ROOT}/run-${s}.sh" + echo "cd ${s}" >>"${ROOT}/run-${s}.sh" + echo >>"${ROOT}/run-${s}.sh" + done + continue + fi + + # Standard line (part of a script) + if [ "${SIMD_EXTS}" != "" ]; then + for s in ${SIMD_EXTS}; do + echo ${line} | sed -e "s,SIMD_EXT,${s},g" \ + -e "s,SRC_DIR,${PWD}/..,g" \ + -e "s,NSCONFIG,${NSCONFIG},g" \ + -e "s,NSTEST,${NSTEST},g" \ + -e "s,NPROC,${NPROC},g" \ + -e "s,TARGET,${TARGET},g" \ + >>"${ROOT}/run-${s}.sh" + done + fi + +done <"${INPUT}" + +# ----------------------------------------------------------------------------- +# Compile all tests + +for i in ${ROOT}/*.sh; do + ( bash ${i} || true ) | tee ${i}.log +done diff --git a/scripts/powerpc64le-linux-gnu-clang++.sh b/scripts/powerpc64le-linux-gnu-clang++.sh new file mode 100755 index 00000000..b829eaa5 --- /dev/null +++ b/scripts/powerpc64le-linux-gnu-clang++.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +clang++ --target=powerpc64le-linux-gnu \ + -I/usr/powerpc64le-linux-gnu/include/c++/8/powerpc64le-linux-gnu "$@" diff --git a/scripts/powerpc64le-linux-gnu-clang.sh b/scripts/powerpc64le-linux-gnu-clang.sh new file mode 100755 index 00000000..3c452723 --- /dev/null +++ b/scripts/powerpc64le-linux-gnu-clang.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +clang --target=powerpc64le-linux-gnu \ + -I/usr/powerpc64le-linux-gnu/include/c++/8/powerpc64le-linux-gnu "$@" diff --git a/scripts/setup.sh b/scripts/setup.sh index 4464e51c..2c737ca1 100644 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -1,5 +1,5 @@ -#!/bin/sh -# Copyright (c) 2019 Agenium Scale +#!/bin/bash +# Copyright (c) 2021 Agenium Scale # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -33,8 +33,7 @@ NSTOOLS_DIR="${PWD}/../nstools" ############################################################################### # Build nsconfig (if not already built) -[ -e "${NSTOOLS_DIR}/README.md" ] && \ - ( cd "${NSTOOLS_DIR}" && ( git pull || true ) ) || \ +[ -d "${NSTOOLS_DIR}" ] || \ ( cd "${PWD}/.." && \ ( [ -d .git ] \ && ( git clone `git remote get-url origin | sed s/nsimd/nstools/g` ) \ @@ -44,12 +43,9 @@ if [ "${NSTOOLS_CHECKOUT_LAST_COMMIT}" == "" ]; then git -C "${NSTOOLS_DIR}" checkout v2.2 else git -C "${NSTOOLS_DIR}" checkout master + git -C "${NSTOOLS_DIR}" pull fi -[ -e "${NSTOOLS_DIR}/bin" ] || ( mkdir -p "${NSTOOLS_DIR}/bin" ) - ( cd "${NSTOOLS_DIR}/nsconfig" && \ - make -j8 -f Makefile.nix nsconfig && \ - make -j8 -f Makefile.nix nstest && \ - cp "nsconfig" "${NSTOOLS_DIR}/bin" && \ - cp "nstest" "${NSTOOLS_DIR}/bin" ) + make -B -j8 -f Makefile.nix nsconfig && \ + make -B -j8 -f Makefile.nix nstest ) diff --git a/src/dd.h b/src/dd.h new file mode 100644 index 00000000..20816a5e --- /dev/null +++ b/src/dd.h @@ -0,0 +1,299 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +typedef struct { + vdouble x, y; +} vdouble2; + +static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } +static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } +static vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) { vdouble2 v; v.x = x; v.y = y; return v; } +static vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) { v.x = d; return v; } +static vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) { v.y = d; return v; } +#endif + +static INLINE CONST VECTOR_CC vdouble vupper_vd_vd(vdouble d) { + return vreinterpret_vd_vm(vand_vm_vm_vm(vreinterpret_vm_vd(d), vcast_vm_i_i(0xffffffff, 0xf8000000))); +} + +static INLINE CONST VECTOR_CC vdouble2 vcast_vd2_vd_vd(vdouble h, vdouble l) { + return vd2setxy_vd2_vd_vd(h, l); +} + +static INLINE CONST VECTOR_CC vdouble2 vcast_vd2_d_d(double h, double l) { + return vd2setxy_vd2_vd_vd(vcast_vd_d(h), vcast_vd_d(l)); +} + +static INLINE CONST VECTOR_CC vdouble2 vsel_vd2_vo_vd2_vd2(vopmask m, vdouble2 x, vdouble2 y) { + return vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(m, vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)), + vsel_vd_vo_vd_vd(m, vd2gety_vd_vd2(x), vd2gety_vd_vd2(y))); +} + +static INLINE CONST VECTOR_CC vdouble2 vsel_vd2_vo_d_d_d_d(vopmask o, double x1, double y1, double x0, double y0) { + return vd2setxy_vd2_vd_vd(vsel_vd_vo_d_d(o, x1, x0), + vsel_vd_vo_d_d(o, y1, y0)); +} + +static INLINE CONST VECTOR_CC vdouble vadd_vd_3vd(vdouble v0, vdouble v1, vdouble v2) { + return vadd_vd_vd_vd(vadd_vd_vd_vd(v0, v1), v2); +} + +static INLINE CONST VECTOR_CC vdouble vadd_vd_4vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3) { + return vadd_vd_3vd(vadd_vd_vd_vd(v0, v1), v2, v3); +} + +static INLINE CONST VECTOR_CC vdouble vadd_vd_5vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) { + return vadd_vd_4vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4); +} + +static INLINE CONST VECTOR_CC vdouble vadd_vd_6vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5) { + return vadd_vd_5vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4, v5); +} + +static INLINE CONST VECTOR_CC vdouble vadd_vd_7vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5, vdouble v6) { + return vadd_vd_6vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4, v5, v6); +} + +static INLINE CONST VECTOR_CC vdouble vsub_vd_3vd(vdouble v0, vdouble v1, vdouble v2) { + return vsub_vd_vd_vd(vsub_vd_vd_vd(v0, v1), v2); +} + +static INLINE CONST VECTOR_CC vdouble vsub_vd_4vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3) { + return vsub_vd_3vd(vsub_vd_vd_vd(v0, v1), v2, v3); +} + +static INLINE CONST VECTOR_CC vdouble vsub_vd_5vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) { + return vsub_vd_4vd(vsub_vd_vd_vd(v0, v1), v2, v3, v4); +} + +static INLINE CONST VECTOR_CC vdouble vsub_vd_6vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5) { + return vsub_vd_5vd(vsub_vd_vd_vd(v0, v1), v2, v3, v4, v5); +} + +// + +static INLINE CONST VECTOR_CC vdouble2 ddneg_vd2_vd2(vdouble2 x) { + return vcast_vd2_vd_vd(vneg_vd_vd(vd2getx_vd_vd2(x)), vneg_vd_vd(vd2gety_vd_vd2(x))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddabs_vd2_vd2(vdouble2 x) { + return vcast_vd2_vd_vd(vabs_vd_vd(vd2getx_vd_vd2(x)), + vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(vd2gety_vd_vd2(x)), + vand_vm_vm_vm(vreinterpret_vm_vd(vd2getx_vd_vd2(x)), + vreinterpret_vm_vd(vcast_vd_d(-0.0)))))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddnormalize_vd2_vd2(vdouble2 t) { + vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(t), vd2gety_vd_vd2(t)); + return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vsub_vd_vd_vd(vd2getx_vd_vd2(t), s), vd2gety_vd_vd2(t))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddscale_vd2_vd2_vd(vdouble2 d, vdouble s) { + return vd2setxy_vd2_vd_vd(vmul_vd_vd_vd(vd2getx_vd_vd2(d), s), vmul_vd_vd_vd(vd2gety_vd_vd2(d), s)); +} + +static INLINE CONST VECTOR_CC vdouble2 ddadd_vd2_vd_vd(vdouble x, vdouble y) { + vdouble s = vadd_vd_vd_vd(x, y); + return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vsub_vd_vd_vd(x, s), y)); +} + +static INLINE CONST VECTOR_CC vdouble2 ddadd2_vd2_vd_vd(vdouble x, vdouble y) { + vdouble s = vadd_vd_vd_vd(x, y); + vdouble v = vsub_vd_vd_vd(s, x); + return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vsub_vd_vd_vd(x, vsub_vd_vd_vd(s, v)), vsub_vd_vd_vd(y, v))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddadd_vd2_vd2_vd(vdouble2 x, vdouble y) { + vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(x), y); + return vd2setxy_vd2_vd_vd(s, vadd_vd_3vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), s), y, vd2gety_vd_vd2(x))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddsub_vd2_vd2_vd(vdouble2 x, vdouble y) { + vdouble s = vsub_vd_vd_vd(vd2getx_vd_vd2(x), y); + return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vsub_vd_vd_vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), s), y), vd2gety_vd_vd2(x))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddadd2_vd2_vd2_vd(vdouble2 x, vdouble y) { + vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(x), y); + vdouble v = vsub_vd_vd_vd(s, vd2getx_vd_vd2(x)); + vdouble w = vadd_vd_vd_vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), vsub_vd_vd_vd(s, v)), vsub_vd_vd_vd(y, v)); + return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(w, vd2gety_vd_vd2(x))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddadd_vd2_vd_vd2(vdouble x, vdouble2 y) { + vdouble s = vadd_vd_vd_vd(x, vd2getx_vd_vd2(y)); + return vd2setxy_vd2_vd_vd(s, vadd_vd_3vd(vsub_vd_vd_vd(x, s), vd2getx_vd_vd2(y), vd2gety_vd_vd2(y))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddadd2_vd2_vd_vd2(vdouble x, vdouble2 y) { + vdouble s = vadd_vd_vd_vd(x, vd2getx_vd_vd2(y)); + vdouble v = vsub_vd_vd_vd(s, x); + return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vadd_vd_vd_vd(vsub_vd_vd_vd(x, vsub_vd_vd_vd(s, v)), + vsub_vd_vd_vd(vd2getx_vd_vd2(y), v)), vd2gety_vd_vd2(y))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddadd_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { + // |x| >= |y| + + vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); + return vd2setxy_vd2_vd_vd(s, vadd_vd_4vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), s), vd2getx_vd_vd2(y), vd2gety_vd_vd2(x), vd2gety_vd_vd2(y))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddadd2_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { + vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); + vdouble v = vsub_vd_vd_vd(s, vd2getx_vd_vd2(x)); + vdouble t = vadd_vd_vd_vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), vsub_vd_vd_vd(s, v)), vsub_vd_vd_vd(vd2getx_vd_vd2(y), v)); + return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(t, vadd_vd_vd_vd(vd2gety_vd_vd2(x), vd2gety_vd_vd2(y)))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddsub_vd2_vd_vd(vdouble x, vdouble y) { + // |x| >= |y| + + vdouble s = vsub_vd_vd_vd(x, y); + return vd2setxy_vd2_vd_vd(s, vsub_vd_vd_vd(vsub_vd_vd_vd(x, s), y)); +} + +static INLINE CONST VECTOR_CC vdouble2 ddsub_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { + // |x| >= |y| + + vdouble s = vsub_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); + vdouble t = vsub_vd_vd_vd(vd2getx_vd_vd2(x), s); + t = vsub_vd_vd_vd(t, vd2getx_vd_vd2(y)); + t = vadd_vd_vd_vd(t, vd2gety_vd_vd2(x)); + return vd2setxy_vd2_vd_vd(s, vsub_vd_vd_vd(t, vd2gety_vd_vd2(y))); +} + +#ifdef ENABLE_FMA_DP +static INLINE CONST VECTOR_CC vdouble2 dddiv_vd2_vd2_vd2(vdouble2 n, vdouble2 d) { + vdouble t = vrec_vd_vd(vd2getx_vd_vd2(d)); + vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(n), t); + vdouble u = vfmapn_vd_vd_vd_vd(t, vd2getx_vd_vd2(n), s); + vdouble v = vfmanp_vd_vd_vd_vd(vd2gety_vd_vd2(d), t, vfmanp_vd_vd_vd_vd(vd2getx_vd_vd2(d), t, vcast_vd_d(1))); + return vd2setxy_vd2_vd_vd(s, vfma_vd_vd_vd_vd(s, v, vfma_vd_vd_vd_vd(vd2gety_vd_vd2(n), t, u))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd_vd(vdouble x, vdouble y) { + vdouble s = vmul_vd_vd_vd(x, y); + return vd2setxy_vd2_vd_vd(s, vfmapn_vd_vd_vd_vd(x, y, s)); +} + +static INLINE CONST VECTOR_CC vdouble2 ddsqu_vd2_vd2(vdouble2 x) { + vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); + return vd2setxy_vd2_vd_vd(s, vfma_vd_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)), vd2gety_vd_vd2(x), vfmapn_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x), s))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { + vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); + return vd2setxy_vd2_vd_vd(s, vfma_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(y), vfma_vd_vd_vd_vd(vd2gety_vd_vd2(x), vd2getx_vd_vd2(y), vfmapn_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y), s)))); +} + +static INLINE CONST VECTOR_CC vdouble ddmul_vd_vd2_vd2(vdouble2 x, vdouble2 y) { + return vfma_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y), vfma_vd_vd_vd_vd(vd2gety_vd_vd2(x), vd2getx_vd_vd2(y), vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(y)))); +} + +static INLINE CONST VECTOR_CC vdouble ddsqu_vd_vd2(vdouble2 x) { + return vfma_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x), vadd_vd_vd_vd(vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)), vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd2_vd(vdouble2 x, vdouble y) { + vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), y); + return vd2setxy_vd2_vd_vd(s, vfma_vd_vd_vd_vd(vd2gety_vd_vd2(x), y, vfmapn_vd_vd_vd_vd(vd2getx_vd_vd2(x), y, s))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddrec_vd2_vd(vdouble d) { + vdouble s = vrec_vd_vd(d); + return vd2setxy_vd2_vd_vd(s, vmul_vd_vd_vd(s, vfmanp_vd_vd_vd_vd(d, s, vcast_vd_d(1)))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddrec_vd2_vd2(vdouble2 d) { + vdouble s = vrec_vd_vd(vd2getx_vd_vd2(d)); + return vd2setxy_vd2_vd_vd(s, vmul_vd_vd_vd(s, vfmanp_vd_vd_vd_vd(vd2gety_vd_vd2(d), s, vfmanp_vd_vd_vd_vd(vd2getx_vd_vd2(d), s, vcast_vd_d(1))))); +} +#else +static INLINE CONST VECTOR_CC vdouble2 dddiv_vd2_vd2_vd2(vdouble2 n, vdouble2 d) { + vdouble t = vrec_vd_vd(vd2getx_vd_vd2(d)); + vdouble dh = vupper_vd_vd(vd2getx_vd_vd2(d)), dl = vsub_vd_vd_vd(vd2getx_vd_vd2(d), dh); + vdouble th = vupper_vd_vd(t ), tl = vsub_vd_vd_vd(t , th); + vdouble nhh = vupper_vd_vd(vd2getx_vd_vd2(n)), nhl = vsub_vd_vd_vd(vd2getx_vd_vd2(n), nhh); + + vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(n), t); + + vdouble u = vadd_vd_5vd(vsub_vd_vd_vd(vmul_vd_vd_vd(nhh, th), s), vmul_vd_vd_vd(nhh, tl), vmul_vd_vd_vd(nhl, th), vmul_vd_vd_vd(nhl, tl), + vmul_vd_vd_vd(s, vsub_vd_5vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl)))); + + return vd2setxy_vd2_vd_vd(s, vmla_vd_vd_vd_vd(t, vsub_vd_vd_vd(vd2gety_vd_vd2(n), vmul_vd_vd_vd(s, vd2gety_vd_vd2(d))), u)); +} + +static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd_vd(vdouble x, vdouble y) { + vdouble xh = vupper_vd_vd(x), xl = vsub_vd_vd_vd(x, xh); + vdouble yh = vupper_vd_vd(y), yl = vsub_vd_vd_vd(y, yh); + + vdouble s = vmul_vd_vd_vd(x, y); + return vd2setxy_vd2_vd_vd(s, vadd_vd_5vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(s), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd2_vd(vdouble2 x, vdouble y) { + vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); + vdouble yh = vupper_vd_vd(y ), yl = vsub_vd_vd_vd(y , yh); + + vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), y); + return vd2setxy_vd2_vd_vd(s, vadd_vd_6vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(s), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(vd2gety_vd_vd2(x), y))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { + vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); + vdouble yh = vupper_vd_vd(vd2getx_vd_vd2(y)), yl = vsub_vd_vd_vd(vd2getx_vd_vd2(y), yh); + + vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); + return vd2setxy_vd2_vd_vd(s, vadd_vd_7vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(s), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(y)), vmul_vd_vd_vd(vd2gety_vd_vd2(x), vd2getx_vd_vd2(y)))); +} + +static INLINE CONST VECTOR_CC vdouble ddmul_vd_vd2_vd2(vdouble2 x, vdouble2 y) { + vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); + vdouble yh = vupper_vd_vd(vd2getx_vd_vd2(y)), yl = vsub_vd_vd_vd(vd2getx_vd_vd2(y), yh); + + return vadd_vd_6vd(vmul_vd_vd_vd(vd2gety_vd_vd2(x), yh), vmul_vd_vd_vd(xh, vd2gety_vd_vd2(y)), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yh)); +} + +static INLINE CONST VECTOR_CC vdouble2 ddsqu_vd2_vd2(vdouble2 x) { + vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); + + vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); + return vd2setxy_vd2_vd_vd(s, vadd_vd_5vd(vmul_vd_vd_vd(xh, xh), vneg_vd_vd(s), vmul_vd_vd_vd(vadd_vd_vd_vd(xh, xh), xl), vmul_vd_vd_vd(xl, xl), vmul_vd_vd_vd(vd2getx_vd_vd2(x), vadd_vd_vd_vd(vd2gety_vd_vd2(x), vd2gety_vd_vd2(x))))); +} + +static INLINE CONST VECTOR_CC vdouble ddsqu_vd_vd2(vdouble2 x) { + vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); + + return vadd_vd_5vd(vmul_vd_vd_vd(xh, vd2gety_vd_vd2(x)), vmul_vd_vd_vd(xh, vd2gety_vd_vd2(x)), vmul_vd_vd_vd(xl, xl), vadd_vd_vd_vd(vmul_vd_vd_vd(xh, xl), vmul_vd_vd_vd(xh, xl)), vmul_vd_vd_vd(xh, xh)); +} + +static INLINE CONST VECTOR_CC vdouble2 ddrec_vd2_vd(vdouble d) { + vdouble t = vrec_vd_vd(d); + vdouble dh = vupper_vd_vd(d), dl = vsub_vd_vd_vd(d, dh); + vdouble th = vupper_vd_vd(t), tl = vsub_vd_vd_vd(t, th); + + return vd2setxy_vd2_vd_vd(t, vmul_vd_vd_vd(t, vsub_vd_5vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl)))); +} + +static INLINE CONST VECTOR_CC vdouble2 ddrec_vd2_vd2(vdouble2 d) { + vdouble t = vrec_vd_vd(vd2getx_vd_vd2(d)); + vdouble dh = vupper_vd_vd(vd2getx_vd_vd2(d)), dl = vsub_vd_vd_vd(vd2getx_vd_vd2(d), dh); + vdouble th = vupper_vd_vd(t ), tl = vsub_vd_vd_vd(t , th); + + return vd2setxy_vd2_vd_vd(t, vmul_vd_vd_vd(t, vsub_vd_6vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl), vmul_vd_vd_vd(vd2gety_vd_vd2(d), t)))); +} +#endif + +static INLINE CONST VECTOR_CC vdouble2 ddsqrt_vd2_vd2(vdouble2 d) { + vdouble t = vsqrt_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d))); + return ddscale_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd2(d, ddmul_vd2_vd_vd(t, t)), ddrec_vd2_vd(t)), vcast_vd_d(0.5)); +} + +static INLINE CONST VECTOR_CC vdouble2 ddsqrt_vd2_vd(vdouble d) { + vdouble t = vsqrt_vd_vd(d); + return ddscale_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd_vd2(d, ddmul_vd2_vd_vd(t, t)), ddrec_vd2_vd(t)), vcast_vd_d(0.5)); +} diff --git a/src/df.h b/src/df.h new file mode 100644 index 00000000..bbe7f263 --- /dev/null +++ b/src/df.h @@ -0,0 +1,365 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +typedef struct { + vfloat x, y; +} vfloat2; + +static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } +static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } +static vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) { vfloat2 v; v.x = x; v.y = y; return v; } +static vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) { v.x = d; return v; } +static vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) { v.y = d; return v; } +#endif + +static INLINE CONST VECTOR_CC vfloat vupper_vf_vf(vfloat d) { + return vreinterpret_vf_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0xfffff000))); +} + +static INLINE CONST VECTOR_CC vfloat2 vcast_vf2_vf_vf(vfloat h, vfloat l) { + return vf2setxy_vf2_vf_vf(h, l); +} + +static INLINE CONST VECTOR_CC vfloat2 vcast_vf2_f_f(float h, float l) { + return vf2setxy_vf2_vf_vf(vcast_vf_f(h), vcast_vf_f(l)); +} + +static INLINE CONST VECTOR_CC vfloat2 vcast_vf2_d(double d) { + return vf2setxy_vf2_vf_vf(vcast_vf_f(d), vcast_vf_f(d - (float)d)); +} + +static INLINE CONST VECTOR_CC vfloat2 vsel_vf2_vo_vf2_vf2(vopmask m, vfloat2 x, vfloat2 y) { + return vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(m, vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)), vsel_vf_vo_vf_vf(m, vf2gety_vf_vf2(x), vf2gety_vf_vf2(y))); +} + +static INLINE CONST VECTOR_CC vfloat2 vsel_vf2_vo_f_f_f_f(vopmask o, float x1, float y1, float x0, float y0) { + return vf2setxy_vf2_vf_vf(vsel_vf_vo_f_f(o, x1, x0), vsel_vf_vo_f_f(o, y1, y0)); +} + +static INLINE CONST VECTOR_CC vfloat2 vsel_vf2_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vsel_vf2_vo_vf2_vf2(o0, vcast_vf2_d(d0), vsel_vf2_vo_vf2_vf2(o1, vcast_vf2_d(d1), vcast_vf2_d(d2))); +} + +static INLINE CONST VECTOR_CC vfloat2 vsel_vf2_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + return vsel_vf2_vo_vf2_vf2(o0, vcast_vf2_d(d0), vsel_vf2_vo_vf2_vf2(o1, vcast_vf2_d(d1), vsel_vf2_vo_vf2_vf2(o2, vcast_vf2_d(d2), vcast_vf2_d(d3)))); +} + +static INLINE CONST VECTOR_CC vfloat2 vabs_vf2_vf2(vfloat2 x) { + return vcast_vf2_vf_vf(vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0)), vreinterpret_vm_vf(vf2getx_vf_vf2(x))), vreinterpret_vm_vf(vf2getx_vf_vf2(x)))), + vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0)), vreinterpret_vm_vf(vf2getx_vf_vf2(x))), vreinterpret_vm_vf(vf2gety_vf_vf2(x))))); +} + +static INLINE CONST VECTOR_CC vfloat vadd_vf_3vf(vfloat v0, vfloat v1, vfloat v2) { + return vadd_vf_vf_vf(vadd_vf_vf_vf(v0, v1), v2); +} + +static INLINE CONST VECTOR_CC vfloat vadd_vf_4vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3) { + return vadd_vf_3vf(vadd_vf_vf_vf(v0, v1), v2, v3); +} + +static INLINE CONST VECTOR_CC vfloat vadd_vf_5vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4) { + return vadd_vf_4vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4); +} + +static INLINE CONST VECTOR_CC vfloat vadd_vf_6vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4, vfloat v5) { + return vadd_vf_5vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4, v5); +} + +static INLINE CONST VECTOR_CC vfloat vadd_vf_7vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4, vfloat v5, vfloat v6) { + return vadd_vf_6vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4, v5, v6); +} + +static INLINE CONST VECTOR_CC vfloat vsub_vf_3vf(vfloat v0, vfloat v1, vfloat v2) { + return vsub_vf_vf_vf(vsub_vf_vf_vf(v0, v1), v2); +} + +static INLINE CONST VECTOR_CC vfloat vsub_vf_4vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3) { + return vsub_vf_3vf(vsub_vf_vf_vf(v0, v1), v2, v3); +} + +static INLINE CONST VECTOR_CC vfloat vsub_vf_5vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4) { + return vsub_vf_4vf(vsub_vf_vf_vf(v0, v1), v2, v3, v4); +} + +// + +static INLINE CONST VECTOR_CC vfloat2 dfneg_vf2_vf2(vfloat2 x) { + return vcast_vf2_vf_vf(vneg_vf_vf(vf2getx_vf_vf2(x)), vneg_vf_vf(vf2gety_vf_vf2(x))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfabs_vf2_vf2(vfloat2 x) { + return vcast_vf2_vf_vf(vabs_vf_vf(vf2getx_vf_vf2(x)), + vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2gety_vf_vf2(x)), vand_vm_vm_vm(vreinterpret_vm_vf(vf2getx_vf_vf2(x)), vreinterpret_vm_vf(vcast_vf_f(-0.0f)))))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfnormalize_vf2_vf2(vfloat2 t) { + vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(t), vf2gety_vf_vf2(t)); + return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vsub_vf_vf_vf(vf2getx_vf_vf2(t), s), vf2gety_vf_vf2(t))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfscale_vf2_vf2_vf(vfloat2 d, vfloat s) { + return vf2setxy_vf2_vf_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(d), s), vmul_vf_vf_vf(vf2gety_vf_vf2(d), s)); +} + +static INLINE CONST VECTOR_CC vfloat2 dfadd_vf2_vf_vf(vfloat x, vfloat y) { + vfloat s = vadd_vf_vf_vf(x, y); + return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vsub_vf_vf_vf(x, s), y)); +} + +static INLINE CONST VECTOR_CC vfloat2 dfadd2_vf2_vf_vf(vfloat x, vfloat y) { + vfloat s = vadd_vf_vf_vf(x, y); + vfloat v = vsub_vf_vf_vf(s, x); + return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vsub_vf_vf_vf(x, vsub_vf_vf_vf(s, v)), vsub_vf_vf_vf(y, v))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfadd2_vf2_vf_vf2(vfloat x, vfloat2 y) { + vfloat s = vadd_vf_vf_vf(x, vf2getx_vf_vf2(y)); + vfloat v = vsub_vf_vf_vf(s, x); + return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vadd_vf_vf_vf(vsub_vf_vf_vf(x, vsub_vf_vf_vf(s, v)), vsub_vf_vf_vf(vf2getx_vf_vf2(y), v)), vf2gety_vf_vf2(y))); + +} + +static INLINE CONST VECTOR_CC vfloat2 dfadd_vf2_vf2_vf(vfloat2 x, vfloat y) { + vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(x), y); + return vf2setxy_vf2_vf_vf(s, vadd_vf_3vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), s), y, vf2gety_vf_vf2(x))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfsub_vf2_vf2_vf(vfloat2 x, vfloat y) { + vfloat s = vsub_vf_vf_vf(vf2getx_vf_vf2(x), y); + return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vsub_vf_vf_vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), s), y), vf2gety_vf_vf2(x))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfadd2_vf2_vf2_vf(vfloat2 x, vfloat y) { + vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(x), y); + vfloat v = vsub_vf_vf_vf(s, vf2getx_vf_vf2(x)); + vfloat t = vadd_vf_vf_vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), vsub_vf_vf_vf(s, v)), vsub_vf_vf_vf(y, v)); + return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(t, vf2gety_vf_vf2(x))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfadd_vf2_vf_vf2(vfloat x, vfloat2 y) { + vfloat s = vadd_vf_vf_vf(x, vf2getx_vf_vf2(y)); + return vf2setxy_vf2_vf_vf(s, vadd_vf_3vf(vsub_vf_vf_vf(x, s), vf2getx_vf_vf2(y), vf2gety_vf_vf2(y))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfadd_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { + // |x| >= |y| + + vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); + return vf2setxy_vf2_vf_vf(s, vadd_vf_4vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), s), vf2getx_vf_vf2(y), vf2gety_vf_vf2(x), vf2gety_vf_vf2(y))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfadd2_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { + vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); + vfloat v = vsub_vf_vf_vf(s, vf2getx_vf_vf2(x)); + vfloat t = vadd_vf_vf_vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), vsub_vf_vf_vf(s, v)), vsub_vf_vf_vf(vf2getx_vf_vf2(y), v)); + return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(t, vadd_vf_vf_vf(vf2gety_vf_vf2(x), vf2gety_vf_vf2(y)))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfsub_vf2_vf_vf(vfloat x, vfloat y) { + // |x| >= |y| + + vfloat s = vsub_vf_vf_vf(x, y); + return vf2setxy_vf2_vf_vf(s, vsub_vf_vf_vf(vsub_vf_vf_vf(x, s), y)); +} + +static INLINE CONST VECTOR_CC vfloat2 dfsub_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { + // |x| >= |y| + + vfloat s = vsub_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); + vfloat t = vsub_vf_vf_vf(vf2getx_vf_vf2(x), s); + t = vsub_vf_vf_vf(t, vf2getx_vf_vf2(y)); + t = vadd_vf_vf_vf(t, vf2gety_vf_vf2(x)); + return vf2setxy_vf2_vf_vf(s, vsub_vf_vf_vf(t, vf2gety_vf_vf2(y))); +} + +#ifdef ENABLE_FMA_SP +static INLINE CONST VECTOR_CC vfloat2 dfdiv_vf2_vf2_vf2(vfloat2 n, vfloat2 d) { + vfloat t = vrec_vf_vf(vf2getx_vf_vf2(d)); + vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(n), t); + vfloat u = vfmapn_vf_vf_vf_vf(t, vf2getx_vf_vf2(n), s); + vfloat v = vfmanp_vf_vf_vf_vf(vf2gety_vf_vf2(d), t, vfmanp_vf_vf_vf_vf(vf2getx_vf_vf2(d), t, vcast_vf_f(1))); + return vf2setxy_vf2_vf_vf(s, vfma_vf_vf_vf_vf(s, v, vfma_vf_vf_vf_vf(vf2gety_vf_vf2(n), t, u))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf_vf(vfloat x, vfloat y) { + vfloat s = vmul_vf_vf_vf(x, y); + return vf2setxy_vf2_vf_vf(s, vfmapn_vf_vf_vf_vf(x, y, s)); +} + +static INLINE CONST VECTOR_CC vfloat2 dfsqu_vf2_vf2(vfloat2 x) { + vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); + return vf2setxy_vf2_vf_vf(s, vfma_vf_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)), vf2gety_vf_vf2(x), vfmapn_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x), s))); +} + +static INLINE CONST VECTOR_CC vfloat dfsqu_vf_vf2(vfloat2 x) { + return vfma_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x), vadd_vf_vf_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)), vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { + vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); + return vf2setxy_vf2_vf_vf(s, vfma_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(y), vfma_vf_vf_vf_vf(vf2gety_vf_vf2(x), vf2getx_vf_vf2(y), vfmapn_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y), s)))); +} + +static INLINE CONST VECTOR_CC vfloat dfmul_vf_vf2_vf2(vfloat2 x, vfloat2 y) { + return vfma_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y), vfma_vf_vf_vf_vf(vf2gety_vf_vf2(x), vf2getx_vf_vf2(y), vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(y)))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf2_vf(vfloat2 x, vfloat y) { + vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), y); + return vf2setxy_vf2_vf_vf(s, vfma_vf_vf_vf_vf(vf2gety_vf_vf2(x), y, vfmapn_vf_vf_vf_vf(vf2getx_vf_vf2(x), y, s))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfrec_vf2_vf(vfloat d) { + vfloat s = vrec_vf_vf(d); + return vf2setxy_vf2_vf_vf(s, vmul_vf_vf_vf(s, vfmanp_vf_vf_vf_vf(d, s, vcast_vf_f(1)))); +} + +static INLINE CONST VECTOR_CC vfloat2 dfrec_vf2_vf2(vfloat2 d) { + vfloat s = vrec_vf_vf(vf2getx_vf_vf2(d)); + return vf2setxy_vf2_vf_vf(s, vmul_vf_vf_vf(s, vfmanp_vf_vf_vf_vf(vf2gety_vf_vf2(d), s, vfmanp_vf_vf_vf_vf(vf2getx_vf_vf2(d), s, vcast_vf_f(1))))); +} +#else +static INLINE CONST VECTOR_CC vfloat2 dfdiv_vf2_vf2_vf2(vfloat2 n, vfloat2 d) { + vfloat t = vrec_vf_vf(vf2getx_vf_vf2(d)); + vfloat dh = vupper_vf_vf(vf2getx_vf_vf2(d)), dl = vsub_vf_vf_vf(vf2getx_vf_vf2(d), dh); + vfloat th = vupper_vf_vf(t ), tl = vsub_vf_vf_vf(t , th); + vfloat nhh = vupper_vf_vf(vf2getx_vf_vf2(n)), nhl = vsub_vf_vf_vf(vf2getx_vf_vf2(n), nhh); + + vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(n), t); + + vfloat u, w; + w = vcast_vf_f(-1); + w = vmla_vf_vf_vf_vf(dh, th, w); + w = vmla_vf_vf_vf_vf(dh, tl, w); + w = vmla_vf_vf_vf_vf(dl, th, w); + w = vmla_vf_vf_vf_vf(dl, tl, w); + w = vneg_vf_vf(w); + + u = vmla_vf_vf_vf_vf(nhh, th, vneg_vf_vf(s)); + u = vmla_vf_vf_vf_vf(nhh, tl, u); + u = vmla_vf_vf_vf_vf(nhl, th, u); + u = vmla_vf_vf_vf_vf(nhl, tl, u); + u = vmla_vf_vf_vf_vf(s, w, u); + + return vf2setxy_vf2_vf_vf(s, vmla_vf_vf_vf_vf(t, vsub_vf_vf_vf(vf2gety_vf_vf2(n), vmul_vf_vf_vf(s, vf2gety_vf_vf2(d))), u)); +} + +static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf_vf(vfloat x, vfloat y) { + vfloat xh = vupper_vf_vf(x), xl = vsub_vf_vf_vf(x, xh); + vfloat yh = vupper_vf_vf(y), yl = vsub_vf_vf_vf(y, yh); + + vfloat s = vmul_vf_vf_vf(x, y), t; + + t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(s)); + t = vmla_vf_vf_vf_vf(xl, yh, t); + t = vmla_vf_vf_vf_vf(xh, yl, t); + t = vmla_vf_vf_vf_vf(xl, yl, t); + + return vf2setxy_vf2_vf_vf(s, t); +} + +static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf2_vf(vfloat2 x, vfloat y) { + vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); + vfloat yh = vupper_vf_vf(y ), yl = vsub_vf_vf_vf(y , yh); + + vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), y), t; + + t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(s)); + t = vmla_vf_vf_vf_vf(xl, yh, t); + t = vmla_vf_vf_vf_vf(xh, yl, t); + t = vmla_vf_vf_vf_vf(xl, yl, t); + t = vmla_vf_vf_vf_vf(vf2gety_vf_vf2(x), y, t); + + return vf2setxy_vf2_vf_vf(s, t); +} + +static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { + vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); + vfloat yh = vupper_vf_vf(vf2getx_vf_vf2(y)), yl = vsub_vf_vf_vf(vf2getx_vf_vf2(y), yh); + + vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)), t; + + t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(s)); + t = vmla_vf_vf_vf_vf(xl, yh, t); + t = vmla_vf_vf_vf_vf(xh, yl, t); + t = vmla_vf_vf_vf_vf(xl, yl, t); + t = vmla_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(y), t); + t = vmla_vf_vf_vf_vf(vf2gety_vf_vf2(x), vf2getx_vf_vf2(y), t); + + return vf2setxy_vf2_vf_vf(s, t); +} + +static INLINE CONST VECTOR_CC vfloat dfmul_vf_vf2_vf2(vfloat2 x, vfloat2 y) { + vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); + vfloat yh = vupper_vf_vf(vf2getx_vf_vf2(y)), yl = vsub_vf_vf_vf(vf2getx_vf_vf2(y), yh); + + return vadd_vf_6vf(vmul_vf_vf_vf(vf2gety_vf_vf2(x), yh), vmul_vf_vf_vf(xh, vf2gety_vf_vf2(y)), vmul_vf_vf_vf(xl, yl), vmul_vf_vf_vf(xh, yl), vmul_vf_vf_vf(xl, yh), vmul_vf_vf_vf(xh, yh)); +} + +static INLINE CONST VECTOR_CC vfloat2 dfsqu_vf2_vf2(vfloat2 x) { + vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); + + vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)), t; + + t = vmla_vf_vf_vf_vf(xh, xh, vneg_vf_vf(s)); + t = vmla_vf_vf_vf_vf(vadd_vf_vf_vf(xh, xh), xl, t); + t = vmla_vf_vf_vf_vf(xl, xl, t); + t = vmla_vf_vf_vf_vf(vf2getx_vf_vf2(x), vadd_vf_vf_vf(vf2gety_vf_vf2(x), vf2gety_vf_vf2(x)), t); + + return vf2setxy_vf2_vf_vf(s, t); +} + +static INLINE CONST VECTOR_CC vfloat dfsqu_vf_vf2(vfloat2 x) { + vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); + + return vadd_vf_5vf(vmul_vf_vf_vf(xh, vf2gety_vf_vf2(x)), vmul_vf_vf_vf(xh, vf2gety_vf_vf2(x)), vmul_vf_vf_vf(xl, xl), vadd_vf_vf_vf(vmul_vf_vf_vf(xh, xl), vmul_vf_vf_vf(xh, xl)), vmul_vf_vf_vf(xh, xh)); +} + +static INLINE CONST VECTOR_CC vfloat2 dfrec_vf2_vf(vfloat d) { + vfloat t = vrec_vf_vf(d); + vfloat dh = vupper_vf_vf(d), dl = vsub_vf_vf_vf(d, dh); + vfloat th = vupper_vf_vf(t), tl = vsub_vf_vf_vf(t, th); + + vfloat u = vcast_vf_f(-1); + u = vmla_vf_vf_vf_vf(dh, th, u); + u = vmla_vf_vf_vf_vf(dh, tl, u); + u = vmla_vf_vf_vf_vf(dl, th, u); + u = vmla_vf_vf_vf_vf(dl, tl, u); + + return vf2setxy_vf2_vf_vf(t, vmul_vf_vf_vf(vneg_vf_vf(t), u)); +} + +static INLINE CONST VECTOR_CC vfloat2 dfrec_vf2_vf2(vfloat2 d) { + vfloat t = vrec_vf_vf(vf2getx_vf_vf2(d)); + vfloat dh = vupper_vf_vf(vf2getx_vf_vf2(d)), dl = vsub_vf_vf_vf(vf2getx_vf_vf2(d), dh); + vfloat th = vupper_vf_vf(t ), tl = vsub_vf_vf_vf(t , th); + + vfloat u = vcast_vf_f(-1); + u = vmla_vf_vf_vf_vf(dh, th, u); + u = vmla_vf_vf_vf_vf(dh, tl, u); + u = vmla_vf_vf_vf_vf(dl, th, u); + u = vmla_vf_vf_vf_vf(dl, tl, u); + u = vmla_vf_vf_vf_vf(vf2gety_vf_vf2(d), t, u); + + return vf2setxy_vf2_vf_vf(t, vmul_vf_vf_vf(vneg_vf_vf(t), u)); +} +#endif + +static INLINE CONST VECTOR_CC vfloat2 dfsqrt_vf2_vf2(vfloat2 d) { +#ifdef ENABLE_RECSQRT_SP + vfloat x = vrecsqrt_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d))); + vfloat2 r = dfmul_vf2_vf2_vf(d, x); + return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(r, dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(r, x), vcast_vf_f(-3.0))), vcast_vf_f(-0.5)); +#else + vfloat t = vsqrt_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d))); + return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf2(d, dfmul_vf2_vf_vf(t, t)), dfrec_vf2_vf(t)), vcast_vf_f(0.5)); +#endif +} + +static INLINE CONST VECTOR_CC vfloat2 dfsqrt_vf2_vf(vfloat d) { + vfloat t = vsqrt_vf_vf(d); + return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf_vf2(d, dfmul_vf2_vf_vf(t, t)), dfrec_vf2_vf(t)), vcast_vf_f(0.5f)); +} diff --git a/src/estrin.h b/src/estrin.h new file mode 100644 index 00000000..a7942329 --- /dev/null +++ b/src/estrin.h @@ -0,0 +1,36 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// These are macros for evaluating polynomials using Estrin's method + +#define POLY2(x, c1, c0) MLA(x, C2V(c1), C2V(c0)) +#define POLY3(x, x2, c2, c1, c0) MLA(x2, C2V(c2), MLA(x, C2V(c1), C2V(c0))) +#define POLY4(x, x2, c3, c2, c1, c0) MLA(x2, MLA(x, C2V(c3), C2V(c2)), MLA(x, C2V(c1), C2V(c0))) +#define POLY5(x, x2, x4, c4, c3, c2, c1, c0) MLA(x4, C2V(c4), POLY4(x, x2, c3, c2, c1, c0)) +#define POLY6(x, x2, x4, c5, c4, c3, c2, c1, c0) MLA(x4, POLY2(x, c5, c4), POLY4(x, x2, c3, c2, c1, c0)) +#define POLY7(x, x2, x4, c6, c5, c4, c3, c2, c1, c0) MLA(x4, POLY3(x, x2, c6, c5, c4), POLY4(x, x2, c3, c2, c1, c0)) +#define POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0) MLA(x4, POLY4(x, x2, c7, c6, c5, c4), POLY4(x, x2, c3, c2, c1, c0)) +#define POLY9(x, x2, x4, x8, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x8, C2V(c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY10(x, x2, x4, x8, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x8, POLY2(x, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY11(x, x2, x4, x8, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x8, POLY3(x, x2, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY12(x, x2, x4, x8, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x8, POLY4(x, x2, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY13(x, x2, x4, x8, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x8, POLY5(x, x2, x4, cc, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY14(x, x2, x4, x8, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x8, POLY6(x, x2, x4, cd, cc, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY15(x, x2, x4, x8, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x8, POLY7(x, x2, x4, ce, cd, cc, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY16(x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x8, POLY8(x, x2, x4, cf, ce, cd, cc, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY17(x, x2, x4, x8, x16, d0, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x16, C2V(d0), POLY16(x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY18(x, x2, x4, x8, x16, d1, d0, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x16, POLY2(x, d1, d0), POLY16(x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)) +#define POLY19(x, x2, x4, x8, x16, d2, d1, d0, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ + MLA(x16, POLY3(x, x2, d2, d1, d0), POLY16(x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)) diff --git a/src/gpu.cpp b/src/gpu.cpp new file mode 100644 index 00000000..446ad099 --- /dev/null +++ b/src/gpu.cpp @@ -0,0 +1,91 @@ +/* + +Copyright (c) 2021 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#define NSIMD_INSIDE +#include + +#if defined(NSIMD_ONEAPI) && NSIMD_CXX > 0 + +// ---------------------------------------------------------------------------- +// oneAPI + +// NSIMD error handler +namespace nsimd { +namespace oneapi { +template +struct sycl_async_error_handler { + void operator()(const sycl::exception_list &elist) { + for (const auto &exc : elist) { + try { + std::rethrow_exception(exc); + } catch (const Exception &exc) { + fprintf(stderr, "NSIMD Internal error:\n\tError: %s %s %d\n", + exc.what(), __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + } + } +}; +} // namespace oneapi +} // namespace nsimd + +extern "C" { + +// Singleton to get default oneAPI queue +NSIMD_DLLSPEC void *nsimd_oneapi_default_queue() { + static sycl::queue ret(sycl::default_selector{}, + nsimd::oneapi::sycl_async_error_handler<>{}); + return (void *)&ret; +} + +NSIMD_DLLSPEC nsimd_nat nsimd_kernel_param(nsimd_nat nb_items, + nsimd_nat block_size) { + return block_size * ((nb_items + block_size - 1) / block_size); +} + +} // extern "C" + +#elif defined(NSIMD_CUDA) || defined(NSIMD_ROCM) + +// ---------------------------------------------------------------------------- +// CUDA/ROCm + +NSIMD_DLLSPEC nsimd_nat nsimd_kernel_param(nsimd_nat nb_items, + nsimd_nat block_size) { + return (nb_items + block_size - 1) / block_size; +} + +#else + +// ---------------------------------------------------------------------------- +// CPU/SIMD + +NSIMD_DLLSPEC nsimd_nat nsimd_kernel_param(nsimd_nat nb_items, + nsimd_nat block_size) { + return nb_items / block_size; +} + +// ---------------------------------------------------------------------------- + +#endif diff --git a/src/helperadvsimd.h b/src/helperadvsimd.h new file mode 100644 index 00000000..4e333e6b --- /dev/null +++ b/src/helperadvsimd.h @@ -0,0 +1,845 @@ +/*********************************************************************/ +/* Copyright ARM Ltd. 2010 - 2019. */ +/* Distributed under the Boost Software License, Version 1.0. */ +/* (See accompanying file LICENSE.txt or copy at */ +/* http://www.boost.org/LICENSE_1_0.txt) */ +/*********************************************************************/ + +#ifndef __ARM_NEON +#error Please specify advsimd flags. +#endif + +#if !defined(SLEEF_GENHEADER) +#include +#include + +#include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) + +#define ENABLE_DP +//@#define ENABLE_DP +#define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 +#define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) + +#define ENABLE_SP +//@#define ENABLE_SP +#define LOG2VECTLENSP 2 +//@#define LOG2VECTLENSP 2 +#define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) + +#if CONFIG == 1 +#define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP +#define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP +#endif + +#define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING +#define ACCURATE_SQRT +//@#define ACCURATE_SQRT + +#define ISANAME "AArch64 AdvSIMD" + +// Mask definition +typedef uint32x4_t vmask; +typedef uint32x4_t vopmask; + +// Single precision definitions +typedef float32x4_t vfloat; +typedef int32x4_t vint2; + +// Double precision definitions +typedef float64x2_t vdouble; +typedef int32x2_t vint; + +typedef struct { + vmask x, y; +} vmask2; + +#define DFTPRIORITY 10 + +static INLINE int vavailability_i(int name) { return 3; } +static INLINE void vprefetch_v_p(const void *ptr) { } + +static INLINE VECTOR_CC int vtestallones_i_vo32(vopmask g) { + uint32x2_t x0 = vand_u32(vget_low_u32(g), vget_high_u32(g)); + uint32x2_t x1 = vpmin_u32(x0, x0); + return vget_lane_u32(x1, 0); +} + +static INLINE VECTOR_CC int vtestallones_i_vo64(vopmask g) { + uint32x2_t x0 = vand_u32(vget_low_u32(g), vget_high_u32(g)); + uint32x2_t x1 = vpmin_u32(x0, x0); + return vget_lane_u32(x1, 0); +} + +// Vector load / store +static INLINE VECTOR_CC vdouble vload_vd_p(const double *ptr) { return vld1q_f64(ptr); } +static INLINE VECTOR_CC vdouble vloadu_vd_p(const double *ptr) { return vld1q_f64(ptr); } +static INLINE VECTOR_CC void vstore_v_p_vd(double *ptr, vdouble v) { vst1q_f64(ptr, v); } +static INLINE VECTOR_CC void vstoreu_v_p_vd(double *ptr, vdouble v) { vst1q_f64(ptr, v); } +static INLINE VECTOR_CC vfloat vload_vf_p(const float *ptr) { return vld1q_f32(ptr); } +static INLINE VECTOR_CC vfloat vloadu_vf_p(const float *ptr) { return vld1q_f32(ptr); } +static INLINE VECTOR_CC void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } +static INLINE VECTOR_CC void vstoreu_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } +static INLINE VECTOR_CC vint2 vloadu_vi2_p(int32_t *p) { return vld1q_s32(p); } +static INLINE VECTOR_CC void vstoreu_v_p_vi2(int32_t *p, vint2 v) { vst1q_s32(p, v); } +static INLINE VECTOR_CC vint vloadu_vi_p(int32_t *p) { return vld1_s32(p); } +static INLINE VECTOR_CC void vstoreu_v_p_vi(int32_t *p, vint v) { vst1_s32(p, v); } + +static INLINE VECTOR_CC vdouble vgather_vd_p_vi(const double *ptr, vint vi) { + return ((vdouble) { ptr[vget_lane_s32(vi, 0)], ptr[vget_lane_s32(vi, 1)]} ); +} + +static INLINE VECTOR_CC vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { + return ((vfloat) { + ptr[vgetq_lane_s32(vi2, 0)], + ptr[vgetq_lane_s32(vi2, 1)], + ptr[vgetq_lane_s32(vi2, 2)], + ptr[vgetq_lane_s32(vi2, 3)] + }); +} + +// Basic logical operations for mask +static INLINE VECTOR_CC vmask vand_vm_vm_vm(vmask x, vmask y) { return vandq_u32(x, y); } +static INLINE VECTOR_CC vmask vandnot_vm_vm_vm(vmask x, vmask y) { + return vbicq_u32(y, x); +} +static INLINE VECTOR_CC vmask vor_vm_vm_vm(vmask x, vmask y) { return vorrq_u32(x, y); } +static INLINE VECTOR_CC vmask vxor_vm_vm_vm(vmask x, vmask y) { return veorq_u32(x, y); } + +// Mask <--> single precision reinterpret +static INLINE VECTOR_CC vmask vreinterpret_vm_vf(vfloat vf) { + return vreinterpretq_u32_f32(vf); +} +static INLINE VECTOR_CC vfloat vreinterpret_vf_vm(vmask vm) { + return vreinterpretq_f32_u32(vm); +} +static INLINE VECTOR_CC vint2 vcast_vi2_vm(vmask vm) { return vreinterpretq_s32_u32(vm); } +static INLINE VECTOR_CC vmask vcast_vm_vi2(vint2 vi) { return vreinterpretq_u32_s32(vi); } + +// Mask <--> double precision reinterpret +static INLINE VECTOR_CC vmask vreinterpret_vm_vd(vdouble vd) { + return vreinterpretq_u32_f64(vd); +} +static INLINE VECTOR_CC vdouble vreinterpret_vd_vm(vmask vm) { + return vreinterpretq_f64_u32(vm); +} +static INLINE VECTOR_CC vfloat vreinterpret_vf_vi2(vint2 vm) { + return vreinterpretq_f32_s32(vm); +} +static INLINE VECTOR_CC vint2 vreinterpret_vi2_vf(vfloat vf) { + return vreinterpretq_s32_f32(vf); +} +static INLINE VECTOR_CC vint2 vreinterpret_vi2_vd(vdouble vd) { + return vreinterpretq_s32_f64(vd); +} + +/****************************************/ +/* Single precision FP operations */ +/****************************************/ +// Broadcast +static INLINE VECTOR_CC vfloat vcast_vf_f(float f) { return vdupq_n_f32(f); } + +// Add, Sub, Mul +static INLINE VECTOR_CC vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { + return vaddq_f32(x, y); +} +static INLINE VECTOR_CC vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { + return vsubq_f32(x, y); +} +static INLINE VECTOR_CC vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { + return vmulq_f32(x, y); +} + +// |x|, -x +static INLINE VECTOR_CC vfloat vabs_vf_vf(vfloat f) { return vabsq_f32(f); } +static INLINE VECTOR_CC vfloat vneg_vf_vf(vfloat f) { return vnegq_f32(f); } + +#if CONFIG == 1 +// Multiply accumulate: z = z + x * y +static INLINE VECTOR_CC vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return vfmaq_f32(z, x, y); +} +// Multiply subtract: z = z - x * y +static INLINE VECTOR_CC vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return vfmsq_f32(z, x, y); +} +// Multiply subtract: z = x * y - z +static INLINE VECTOR_CC vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return vneg_vf_vf(vfmsq_f32(z, x, y)); +} +#else +static INLINE VECTOR_CC vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +static INLINE VECTOR_CC vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } +static INLINE VECTOR_CC vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +#endif + +static INLINE VECTOR_CC vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z + x * y + return vfmaq_f32(z, x, y); +} + +static INLINE VECTOR_CC vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z - x * y + return vfmsq_f32(z, x, y); +} + +static INLINE VECTOR_CC vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // x * y - z + return vneg_vf_vf(vfmanp_vf_vf_vf_vf(x, y, z)); +} + +// Reciprocal 1/x, Division, Square root +static INLINE VECTOR_CC vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) { +#ifndef ENABLE_ALTDIV + return vdivq_f32(n, d); +#else + // Finite numbers (including denormal) only, gives mostly correctly rounded result + float32x4_t t, u, x, y; + uint32x4_t i0, i1; + i0 = vandq_u32(vreinterpretq_u32_f32(n), vdupq_n_u32(0x7c000000)); + i1 = vandq_u32(vreinterpretq_u32_f32(d), vdupq_n_u32(0x7c000000)); + i0 = vsubq_u32(vdupq_n_u32(0x7d000000), vshrq_n_u32(vaddq_u32(i0, i1), 1)); + t = vreinterpretq_f32_u32(i0); + y = vmulq_f32(d, t); + x = vmulq_f32(n, t); + t = vrecpeq_f32(y); + t = vmulq_f32(t, vrecpsq_f32(y, t)); + t = vmulq_f32(t, vrecpsq_f32(y, t)); + u = vmulq_f32(x, t); + u = vfmaq_f32(u, vfmsq_f32(x, y, u), t); + return u; +#endif +} +static INLINE VECTOR_CC vfloat vrec_vf_vf(vfloat d) { +#ifndef ENABLE_ALTDIV + return vdiv_vf_vf_vf(vcast_vf_f(1.0f), d); +#else + return vbslq_f32(vceqq_f32(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)), + vcast_vf_f(0), vdiv_vf_vf_vf(vcast_vf_f(1.0f), d)); +#endif +} + +static INLINE VECTOR_CC vfloat vsqrt_vf_vf(vfloat d) { +#ifndef ENABLE_ALTSQRT + return vsqrtq_f32(d); +#else + // Gives correctly rounded result for all input range + vfloat w, x, y, z; + + y = vrsqrteq_f32(d); + x = vmul_vf_vf_vf(d, y); w = vmul_vf_vf_vf(vcast_vf_f(0.5), y); + y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(0.5)); + x = vfma_vf_vf_vf_vf(x, y, x); w = vfma_vf_vf_vf_vf(w, y, w); + + y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(1.5)); w = vadd_vf_vf_vf(w, w); + w = vmul_vf_vf_vf(w, y); + x = vmul_vf_vf_vf(w, d); + y = vfmapn_vf_vf_vf_vf(w, d, x); z = vfmanp_vf_vf_vf_vf(w, x, vcast_vf_f(1)); + z = vfmanp_vf_vf_vf_vf(w, y, z); w = vmul_vf_vf_vf(vcast_vf_f(0.5), x); + w = vfma_vf_vf_vf_vf(w, z, y); + w = vadd_vf_vf_vf(w, x); + + return vbslq_f32(vorrq_u32(vceqq_f32(d, vcast_vf_f(0)), + vceqq_f32(d, vcast_vf_f(SLEEF_INFINITYf))), d, w); +#endif +} + +// max, min +static INLINE VECTOR_CC vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { + return vmaxq_f32(x, y); +} +static INLINE VECTOR_CC vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { + return vminq_f32(x, y); +} + +// Comparisons +static INLINE VECTOR_CC vmask veq_vm_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); } +static INLINE VECTOR_CC vmask vneq_vm_vf_vf(vfloat x, vfloat y) { + return vmvnq_u32(vceqq_f32(x, y)); +} +static INLINE VECTOR_CC vmask vlt_vm_vf_vf(vfloat x, vfloat y) { return vcltq_f32(x, y); } +static INLINE VECTOR_CC vmask vle_vm_vf_vf(vfloat x, vfloat y) { return vcleq_f32(x, y); } +static INLINE VECTOR_CC vmask vgt_vm_vf_vf(vfloat x, vfloat y) { return vcgtq_f32(x, y); } +static INLINE VECTOR_CC vmask vge_vm_vf_vf(vfloat x, vfloat y) { return vcgeq_f32(x, y); } + +// Conditional select +static INLINE VECTOR_CC vfloat vsel_vf_vm_vf_vf(vmask mask, vfloat x, vfloat y) { + return vbslq_f32(mask, x, y); +} + +// int <--> float conversions +static INLINE VECTOR_CC vint2 vtruncate_vi2_vf(vfloat vf) { return vcvtq_s32_f32(vf); } +static INLINE VECTOR_CC vfloat vcast_vf_vi2(vint2 vi) { return vcvtq_f32_s32(vi); } +static INLINE VECTOR_CC vint2 vcast_vi2_i(int i) { return vdupq_n_s32(i); } +static INLINE VECTOR_CC vint2 vrint_vi2_vf(vfloat d) { + return vcvtq_s32_f32(vrndnq_f32(d)); +} + +/***************************************/ +/* Single precision integer operations */ +/***************************************/ + +// Add, Sub, Neg (-x) +static INLINE VECTOR_CC vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { + return vaddq_s32(x, y); +} +static INLINE VECTOR_CC vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { + return vsubq_s32(x, y); +} +static INLINE VECTOR_CC vint2 vneg_vi2_vi2(vint2 e) { return vnegq_s32(e); } + +// Logical operations +static INLINE VECTOR_CC vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { + return vandq_s32(x, y); +} +static INLINE VECTOR_CC vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { + return vbicq_s32(y, x); +} +static INLINE VECTOR_CC vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { + return vorrq_s32(x, y); +} +static INLINE VECTOR_CC vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { + return veorq_s32(x, y); +} + +// Shifts +#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) +//@#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) +#define vsrl_vi2_vi2_i(x, c) \ + vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) +//@#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) + +#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) +//@#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) +#define vsra_vi_vi_i(x, c) vshr_n_s32(x, c) +//@#define vsra_vi_vi_i(x, c) vshr_n_s32(x, c) +#define vsll_vi_vi_i(x, c) vshl_n_s32(x, c) +//@#define vsll_vi_vi_i(x, c) vshl_n_s32(x, c) +#define vsrl_vi_vi_i(x, c) \ + vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c)) +//@#define vsrl_vi_vi_i(x, c) vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c)) + +// Comparison returning masks +static INLINE VECTOR_CC vmask veq_vm_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } +static INLINE VECTOR_CC vmask vgt_vm_vi2_vi2(vint2 x, vint2 y) { return vcgeq_s32(x, y); } +// Comparison returning integers +static INLINE VECTOR_CC vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { + return vreinterpretq_s32_u32(vcgeq_s32(x, y)); +} +static INLINE VECTOR_CC vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { + return vreinterpretq_s32_u32(vceqq_s32(x, y)); +} + +// Conditional select +static INLINE VECTOR_CC vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) { + return vbslq_s32(m, x, y); +} + +/* -------------------------------------------------------------------------- */ +/* -------------------------------------------------------------------------- */ +/* -------------------------------------------------------------------------- */ +/* -------------------------------------------------------------------------- */ + +/****************************************/ +/* Double precision FP operations */ +/****************************************/ +// Broadcast +static INLINE VECTOR_CC vdouble vcast_vd_d(double f) { return vdupq_n_f64(f); } + +// Add, Sub, Mul +static INLINE VECTOR_CC vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { + return vaddq_f64(x, y); +} +static INLINE VECTOR_CC vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { + return vsubq_f64(x, y); +} +static INLINE VECTOR_CC vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { + return vmulq_f64(x, y); +} + +// |x|, -x +static INLINE VECTOR_CC vdouble vabs_vd_vd(vdouble f) { return vabsq_f64(f); } +static INLINE VECTOR_CC vdouble vneg_vd_vd(vdouble f) { return vnegq_f64(f); } + +// max, min +static INLINE VECTOR_CC vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { + return vmaxq_f64(x, y); +} +static INLINE VECTOR_CC vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { + return vminq_f64(x, y); +} + +#if CONFIG == 1 +// Multiply accumulate: z = z + x * y +static INLINE VECTOR_CC vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return vfmaq_f64(z, x, y); +} + +static INLINE VECTOR_CC vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return vfmsq_f64(z, x, y); +} + +//[z = x * y - z] +static INLINE VECTOR_CC vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return vneg_vd_vd(vfmsq_f64(z, x, y)); +} +#else +static INLINE VECTOR_CC vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE VECTOR_CC vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +#endif + +static INLINE VECTOR_CC vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z + x * y + return vfmaq_f64(z, x, y); +} + +static INLINE VECTOR_CC vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z - x * y + return vfmsq_f64(z, x, y); +} + +static INLINE VECTOR_CC vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // x * y - z + return vneg_vd_vd(vfmanp_vd_vd_vd_vd(x, y, z)); +} + +// Reciprocal 1/x, Division, Square root +static INLINE VECTOR_CC vdouble vdiv_vd_vd_vd(vdouble n, vdouble d) { +#ifndef ENABLE_ALTDIV + return vdivq_f64(n, d); +#else + // Finite numbers (including denormal) only, gives mostly correctly rounded result + float64x2_t t, u, x, y; + uint64x2_t i0, i1; + i0 = vandq_u64(vreinterpretq_u64_f64(n), vdupq_n_u64(0x7fc0000000000000L)); + i1 = vandq_u64(vreinterpretq_u64_f64(d), vdupq_n_u64(0x7fc0000000000000L)); + i0 = vsubq_u64(vdupq_n_u64(0x7fd0000000000000L), vshrq_n_u64(vaddq_u64(i0, i1), 1)); + t = vreinterpretq_f64_u64(i0); + y = vmulq_f64(d, t); + x = vmulq_f64(n, t); + t = vrecpeq_f64(y); + t = vmulq_f64(t, vrecpsq_f64(y, t)); + t = vmulq_f64(t, vrecpsq_f64(y, t)); + t = vmulq_f64(t, vrecpsq_f64(y, t)); + u = vmulq_f64(x, t); + u = vfmaq_f64(u, vfmsq_f64(x, y, u), t); + return u; +#endif +} +static INLINE VECTOR_CC vdouble vrec_vd_vd(vdouble d) { +#ifndef ENABLE_ALTDIV + return vdiv_vd_vd_vd(vcast_vd_d(1.0f), d); +#else + return vbslq_f64(vceqq_f64(vabs_vd_vd(d), vcast_vd_d(SLEEF_INFINITY)), + vcast_vd_d(0), vdiv_vd_vd_vd(vcast_vd_d(1.0f), d)); +#endif +} + +static INLINE VECTOR_CC vdouble vsqrt_vd_vd(vdouble d) { +#ifndef ENABLE_ALTSQRT + return vsqrtq_f64(d); +#else + // Gives correctly rounded result for all input range + vdouble w, x, y, z; + + y = vrsqrteq_f64(d); + x = vmul_vd_vd_vd(d, y); w = vmul_vd_vd_vd(vcast_vd_d(0.5), y); + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); + x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); + x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); + + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(1.5)); w = vadd_vd_vd_vd(w, w); + w = vmul_vd_vd_vd(w, y); + x = vmul_vd_vd_vd(w, d); + y = vfmapn_vd_vd_vd_vd(w, d, x); z = vfmanp_vd_vd_vd_vd(w, x, vcast_vd_d(1)); + z = vfmanp_vd_vd_vd_vd(w, y, z); w = vmul_vd_vd_vd(vcast_vd_d(0.5), x); + w = vfma_vd_vd_vd_vd(w, z, y); + w = vadd_vd_vd_vd(w, x); + + return vbslq_f64(vorrq_u64(vceqq_f64(d, vcast_vd_d(0)), + vceqq_f64(d, vcast_vd_d(SLEEF_INFINITY))), d, w); +#endif +} + +/* Comparisons */ +static INLINE VECTOR_CC vopmask veq_vo_vd_vd(vdouble x, vdouble y) { + return vreinterpretq_u32_u64(vceqq_f64(x, y)); +} +static INLINE VECTOR_CC vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { + return vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(x, y))); +} +static INLINE VECTOR_CC vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { + return vreinterpretq_u32_u64(vcltq_f64(x, y)); +} +static INLINE VECTOR_CC vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { + return vreinterpretq_u32_u64(vcgtq_f64(x, y)); +} +static INLINE VECTOR_CC vopmask vle_vo_vd_vd(vdouble x, vdouble y) { + return vreinterpretq_u32_u64(vcleq_f64(x, y)); +} +static INLINE VECTOR_CC vopmask vge_vo_vd_vd(vdouble x, vdouble y) { + return vreinterpretq_u32_u64(vcgeq_f64(x, y)); +} + +// Conditional select +static INLINE VECTOR_CC vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) { + return vbslq_f64(vreinterpretq_u64_u32(mask), x, y); +} + +#if 1 +static INLINE CONST VECTOR_CC vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { + return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); +} + +static INLINE VECTOR_CC vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); +} + +static INLINE VECTOR_CC vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); +} +#else +// This implementation is slower on the current CPU models (as of May 2017.) +// I(Naoki Shibata) expect that on future CPU models with hardware similar to Super Shuffle Engine, this implementation will be faster. +static INLINE CONST VECTOR_CC vdouble vsel_vd_vo_d_d(vopmask o, double d0, double d1) { + uint8x16_t idx = vbslq_u8(vreinterpretq_u8_u32(o), (uint8x16_t) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }, + (uint8x16_t) { 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15 }); + + uint8x16_t tab = (uint8x16_t) (float64x2_t) { d0, d1 }; + return (vdouble) vqtbl1q_u8(tab, idx); +} + +static INLINE VECTOR_CC vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + uint8x16_t idx = vbslq_u8(vreinterpretq_u8_u32(o0), (uint8x16_t) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }, + vbslq_u8(vreinterpretq_u8_u32(o1), (uint8x16_t) { 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15 }, + vbslq_u8(vreinterpretq_u8_u32(o2), (uint8x16_t) { 16, 17, 18, 19, 20, 21, 22, 23, 16, 17, 18, 19, 20, 21, 22, 23 }, + (uint8x16_t) { 24, 25, 26, 27, 28, 29, 30, 31, 24, 25, 26, 27, 28, 29, 30, 31 }))); + + uint8x16x2_t tab = { { (uint8x16_t) (float64x2_t) { d0, d1 }, (uint8x16_t) (float64x2_t) { d2, d3 } } }; + return (vdouble) vqtbl2q_u8(tab, idx); +} + +static INLINE VECTOR_CC vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2); +} +#endif + +static INLINE VECTOR_CC vdouble vrint_vd_vd(vdouble d) { return vrndnq_f64(d); } +static INLINE VECTOR_CC vfloat vrint_vf_vf(vfloat d) { return vrndnq_f32(d); } + +/****************************************/ +/* int <--> float conversions */ +/****************************************/ +static INLINE VECTOR_CC vint vtruncate_vi_vd(vdouble vf) { + return vmovn_s64(vcvtq_s64_f64(vf)); +} +static INLINE VECTOR_CC vdouble vcast_vd_vi(vint vi) { + return vcvtq_f64_s64(vmovl_s32(vi)); +} +static INLINE VECTOR_CC vint vcast_vi_i(int i) { return vdup_n_s32(i); } +static INLINE VECTOR_CC vint vrint_vi_vd(vdouble d) { + return vqmovn_s64(vcvtq_s64_f64(vrndnq_f64(d))); +} + +/***************************************/ +/* Integer operations */ +/***************************************/ + +// Add, Sub, Neg (-x) +static INLINE VECTOR_CC vint vadd_vi_vi_vi(vint x, vint y) { return vadd_s32(x, y); } +static INLINE VECTOR_CC vint vsub_vi_vi_vi(vint x, vint y) { return vsub_s32(x, y); } +static INLINE VECTOR_CC vint vneg_vi_vi(vint e) { return vneg_s32(e); } + +// Logical operations +static INLINE VECTOR_CC vint vand_vi_vi_vi(vint x, vint y) { return vand_s32(x, y); } +static INLINE VECTOR_CC vint vandnot_vi_vi_vi(vint x, vint y) { return vbic_s32(y, x); } +static INLINE VECTOR_CC vint vor_vi_vi_vi(vint x, vint y) { return vorr_s32(x, y); } +static INLINE VECTOR_CC vint vxor_vi_vi_vi(vint x, vint y) { return veor_s32(x, y); } + +// Comparison returning masks +static INLINE VECTOR_CC vopmask veq_vo_vi_vi(vint x, vint y) { + return vcombine_u32(vceq_s32(x, y), vdup_n_u32(0)); +} + +// Conditional select +static INLINE VECTOR_CC vint vsel_vi_vm_vi_vi(vmask m, vint x, vint y) { + return vbsl_s32(vget_low_u32(m), x, y); +} + +/***************************************/ +/* Predicates */ +/***************************************/ +static INLINE VECTOR_CC vopmask visinf_vo_vd(vdouble d) { + const float64x2_t inf = vdupq_n_f64(SLEEF_INFINITY); + const float64x2_t neg_inf = vdupq_n_f64(-SLEEF_INFINITY); + uint64x2_t cmp = vorrq_u64(vceqq_f64(d, inf), vceqq_f64(d, neg_inf)); + return vreinterpretq_u32_u64(cmp); +} + +static INLINE VECTOR_CC vopmask visnan_vo_vd(vdouble d) { + return vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(d, d))); +} + +static INLINE VECTOR_CC vopmask vispinf_vo_vd(vdouble d) { + return vreinterpretq_u32_u64(vceqq_f64(d, vdupq_n_f64(SLEEF_INFINITY))); +} + +static INLINE VECTOR_CC vopmask visminf_vo_vd(vdouble d) { + return vreinterpretq_u32_u64(vceqq_f64(d, vdupq_n_f64(-SLEEF_INFINITY))); +} + +static INLINE VECTOR_CC vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { + return vbslq_f32(mask, x, y); +} + +static INLINE CONST VECTOR_CC vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { + return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); +} + +static INLINE VECTOR_CC vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); +} + +static INLINE VECTOR_CC vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); +} + +static INLINE VECTOR_CC vopmask veq_vo_vf_vf(vfloat x, vfloat y) { + return vceqq_f32(x, y); +} +static INLINE VECTOR_CC vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { + return vmvnq_u32(vceqq_f32(x, y)); +} +static INLINE VECTOR_CC vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { + return vcltq_f32(x, y); +} +static INLINE VECTOR_CC vopmask vle_vo_vf_vf(vfloat x, vfloat y) { + return vcleq_f32(x, y); +} +static INLINE VECTOR_CC vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { + return vcgtq_f32(x, y); +} +static INLINE VECTOR_CC vopmask vge_vo_vf_vf(vfloat x, vfloat y) { + return vcgeq_f32(x, y); +} + +static INLINE VECTOR_CC vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { + return vceqq_s32(x, y); +} +static INLINE VECTOR_CC vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { + return vcgtq_s32(x, y); +} +static INLINE VECTOR_CC vopmask vgt_vo_vi_vi(vint x, vint y) { + return vcombine_u32(vcgt_s32(x, y), vdup_n_u32(0)); +} +static INLINE VECTOR_CC vopmask visinf_vo_vf(vfloat d) { + return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); +} +static INLINE VECTOR_CC vopmask vispinf_vo_vf(vfloat d) { + return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); +} +static INLINE VECTOR_CC vopmask visminf_vo_vf(vfloat d) { + return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); +} +static INLINE VECTOR_CC vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } + +static INLINE VECTOR_CC vopmask vcast_vo32_vo64(vopmask m) { + return vuzpq_u32(m, m).val[0]; +} +static INLINE VECTOR_CC vopmask vcast_vo64_vo32(vopmask m) { + return vzipq_u32(m, m).val[0]; +} + +static INLINE VECTOR_CC vopmask vand_vo_vo_vo(vopmask x, vopmask y) { + return vandq_u32(x, y); +} +static INLINE VECTOR_CC vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { + return vbicq_u32(y, x); +} +static INLINE VECTOR_CC vopmask vor_vo_vo_vo(vopmask x, vopmask y) { + return vorrq_u32(x, y); +} +static INLINE VECTOR_CC vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { + return veorq_u32(x, y); +} + +static INLINE VECTOR_CC vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { + return vbslq_s32(m, x, y); +} +static INLINE VECTOR_CC vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { + return vandq_s32(vreinterpretq_s32_u32(x), y); +} +static INLINE VECTOR_CC vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { + return vbicq_s32(y, vreinterpretq_s32_u32(x)); +} +static INLINE VECTOR_CC vint vandnot_vi_vo_vi(vopmask x, vint y) { + return vbic_s32(y, vget_low_s32(vreinterpretq_s32_u32(x))); +} +static INLINE VECTOR_CC vmask vand_vm_vo32_vm(vopmask x, vmask y) { + return vandq_u32(x, y); +} +static INLINE VECTOR_CC vmask vand_vm_vo64_vm(vopmask x, vmask y) { + return vandq_u32(x, y); +} +static INLINE VECTOR_CC vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { + return vbicq_u32(y, x); +} +static INLINE VECTOR_CC vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { + return vbicq_u32(y, x); +} +static INLINE VECTOR_CC vmask vor_vm_vo32_vm(vopmask x, vmask y) { + return vorrq_u32(x, y); +} +static INLINE VECTOR_CC vmask vor_vm_vo64_vm(vopmask x, vmask y) { + return vorrq_u32(x, y); +} +static INLINE VECTOR_CC vmask vxor_vm_vo32_vm(vopmask x, vmask y) { + return veorq_u32(x, y); +} + +static INLINE VECTOR_CC vfloat vtruncate_vf_vf(vfloat vd) { return vrndq_f32(vd); } + +static INLINE VECTOR_CC vmask vcast_vm_i_i(int i0, int i1) { + return vreinterpretq_u32_u64(vdupq_n_u64((0xffffffff & (uint64_t)i1) | (((uint64_t)i0) << 32))); +} + +static INLINE VECTOR_CC vopmask veq64_vo_vm_vm(vmask x, vmask y) { + return vreinterpretq_u32_u64(vceqq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); +} + +static INLINE VECTOR_CC vmask vadd64_vm_vm_vm(vmask x, vmask y) { + return vreinterpretq_u32_s64(vaddq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); +} + +static INLINE VECTOR_CC vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { + return vbsl_s32(vget_low_u32(m), x, y); +} + +// Logical operations +static INLINE VECTOR_CC vint vand_vi_vo_vi(vopmask x, vint y) { + return vand_s32(vreinterpret_s32_u32(vget_low_u32(x)), y); +} + +static INLINE VECTOR_CC vint2 vcastu_vi2_vi(vint vi) { + return vreinterpretq_s32_u32(vrev64q_u32(vreinterpretq_u32_u64(vmovl_u32(vreinterpret_u32_s32(vi))))); +} +static INLINE VECTOR_CC vint vcastu_vi_vi2(vint2 vi2) { + return vreinterpret_s32_u32(vmovn_u64(vreinterpretq_u64_u32(vrev64q_u32(vreinterpretq_u32_s32(vi2))))); +} +static INLINE VECTOR_CC vdouble vreinterpret_vd_vi2(vint2 vi) { + return vreinterpretq_f64_s32(vi); +} +static INLINE VECTOR_CC vdouble vtruncate_vd_vd(vdouble vd) { return vrndq_f64(vd); } + +// + +#define PNMASK ((vdouble) { +0.0, -0.0 }) +#define NPMASK ((vdouble) { -0.0, +0.0 }) +#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) +#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) + +static INLINE VECTOR_CC vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } +static INLINE VECTOR_CC vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } +static INLINE VECTOR_CC vfloat vposneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)PNMASKf); } +static INLINE VECTOR_CC vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)NPMASKf); } + +static INLINE VECTOR_CC vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } +static INLINE VECTOR_CC vfloat vsubadd_vf_vf_vf(vfloat d0, vfloat d1) { return vadd_vf_vf_vf(d0, vnegpos_vf_vf(d1)); } +static INLINE VECTOR_CC vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE VECTOR_CC vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } + +static INLINE VECTOR_CC vdouble vrev21_vd_vd(vdouble d0) { return (float64x2_t)vcombine_u64(vget_high_u64((uint64x2_t)d0), vget_low_u64((uint64x2_t)d0)); } +static INLINE VECTOR_CC vdouble vreva2_vd_vd(vdouble vd) { return vd; } + +static INLINE VECTOR_CC void vstream_v_p_vd(double *ptr, vdouble v) { vstore_v_p_vd(ptr, v); } +static INLINE VECTOR_CC void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } +static INLINE VECTOR_CC void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } + +static INLINE VECTOR_CC vfloat vrev21_vf_vf(vfloat d0) { return vrev64q_f32(d0); } +static INLINE VECTOR_CC vfloat vreva2_vf_vf(vfloat d0) { return vcombine_f32(vget_high_f32(d0), vget_low_f32(d0)); } +static INLINE VECTOR_CC vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } + +static INLINE VECTOR_CC void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } + +static INLINE VECTOR_CC void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); + vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); +} + +static INLINE VECTOR_CC void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); + vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); +} + +// + +static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { + return (vmask2) { + vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))), + vreinterpretq_u32_u64(vtrn2q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))) }; +} + +static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { + return (vmask2) { + vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))), + vreinterpretq_u32_u64(vtrn2q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))) }; +} + +static INLINE vint vuninterleave_vi_vi(vint v) { return v; } +static INLINE vdouble vinterleave_vd_vd(vdouble vd) { return vd; } +static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return vd; } +static INLINE vmask vinterleave_vm_vm(vmask vm) { return vm; } +static INLINE vmask vuninterleave_vm_vm(vmask vm) { return vm; } + +static vmask2 vloadu_vm2_p(void *p) { + vmask2 vm2; + memcpy(&vm2, p, VECTLENDP * 16); + return vm2; +} + +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad2 vargquad; + +static INLINE vmask2 vcast_vm2_aq(vargquad aq) { + return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); +} + +static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { + vm2 = vuninterleave_vm2_vm2(vm2); + vargquad aq; + memcpy(&aq, &vm2, VECTLENDP * 16); + return aq; +} +#endif // #if !defined(SLEEF_GENHEADER) + +static INLINE int vtestallzeros_i_vo64(vopmask g) { + uint32x2_t x0 = vorr_u32(vget_low_u32(g), vget_high_u32(g)); + uint32x2_t x1 = vpmax_u32(x0, x0); + return ~vget_lane_u32(x1, 0); +} + +static INLINE vmask vsel_vm_vo64_vm_vm(vopmask m, vmask x, vmask y) { return vbslq_u32(m, x, y); } + +static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { + return vreinterpretq_u32_s64(vsubq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); +} + +static INLINE vmask vneg64_vm_vm(vmask x) { + return vreinterpretq_u32_s64(vnegq_s64(vreinterpretq_s64_u32(x))); +} + +static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { + return vreinterpretq_u32_u64(vcgtq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); +} + +#define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c)) +//@#define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c)) +#define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c)) +//@#define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c)) + +static INLINE vmask vcast_vm_vi(vint vi) { + vmask m = vreinterpretq_u32_u64(vmovl_u32(vreinterpret_u32_s32(vi))); + return vor_vm_vm_vm(vcast_vm_vi2(vcastu_vi2_vi(vreinterpret_s32_u32(vget_low_u32(vgt_vo_vi_vi(vcast_vi_i(0), vi))))), m); +} +static INLINE vint vcast_vi_vm(vmask vm) { return vreinterpret_s32_u32(vmovn_u64(vreinterpretq_u64_u32(vm))); } diff --git a/src/helperavx.h b/src/helperavx.h new file mode 100644 index 00000000..4c40a286 --- /dev/null +++ b/src/helperavx.h @@ -0,0 +1,695 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#if CONFIG == 1 + +#if !defined(__AVX__) && !defined(SLEEF_GENHEADER) +#error Please specify -mavx. +#endif + +#elif CONFIG == 4 + +#if (!defined(__AVX__) || !defined(__FMA4__)) && !defined(SLEEF_GENHEADER) +#error Please specify -mavx and -mfma4. +#endif + +#else +#error CONFIG macro invalid or not defined +#endif + +#define ENABLE_DP +//@#define ENABLE_DP +#define LOG2VECTLENDP 2 +//@#define LOG2VECTLENDP 2 +#define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) + +#define ENABLE_SP +//@#define ENABLE_SP +#define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) +#define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) + +#define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING +#define ACCURATE_SQRT +//@#define ACCURATE_SQRT + +#if !defined(SLEEF_GENHEADER) +#if defined(_MSC_VER) +#include +#else +#include +#endif + +#include +#include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) + +typedef __m256i vmask; +typedef __m256i vopmask; + +typedef __m256d vdouble; +typedef __m128i vint; + +typedef __m256 vfloat; +typedef struct { __m128i x, y; } vint2; + +typedef struct { + vmask x, y; +} vmask2; + +// + +#if !defined(SLEEF_GENHEADER) + +#ifndef __SLEEF_H__ +static inline + void Sleef_x86CpuID(int32_t out[4], uint32_t eax, + uint32_t ecx) { + /* We don't care for cpuid detection */ + out[0] = 0xFFFFFFFF; + out[1] = 0xFFFFFFFF; + out[2] = 0xFFFFFFFF; + out[3] = 0xFFFFFFFF; + } + #endif + +static INLINE int cpuSupportsAVX() { + int32_t reg[4]; + Sleef_x86CpuID(reg, 1, 0); + return (reg[2] & (1 << 28)) != 0; +} + +static INLINE int cpuSupportsFMA4() { + int32_t reg[4]; + Sleef_x86CpuID(reg, 0x80000001, 0); + return (reg[2] & (1 << 16)) != 0; +} + +#if CONFIG == 4 && defined(__AVX__) && defined(__FMA4__) +static INLINE int vavailability_i(int name) { + //int d = __builtin_cpu_supports("avx") && __builtin_cpu_supports("fma4"); + int d = cpuSupportsAVX() && cpuSupportsFMA4(); + return d ? 3 : 0; +} + +//typedef vint2 vint2_fma4; + +#define ENABLE_FMA_DP +#define ENABLE_FMA_SP + +#define ISANAME "AVX + AMD FMA4" +#define DFTPRIORITY 21 +#else +static INLINE int vavailability_i(int name) { + int d = cpuSupportsAVX(); + return d ? 3 : 0; +} +//typedef vint2 vint2_avx; + +#define ISANAME "AVX" +#define DFTPRIORITY 20 +#endif + +#endif // #if !defined(SLEEF_GENHEADER) + +static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } + +static INLINE int vtestallones_i_vo32(vopmask g) { + return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); +} + +static INLINE int vtestallones_i_vo64(vopmask g) { + return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); +} + +// + +static INLINE vdouble vcast_vd_d(double d) { return _mm256_set1_pd(d); } +static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm256_castpd_si256(vd); } +static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm256_castsi256_pd(vm); } +static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { + vint2 r; + r.x = _mm256_castsi256_si128(vreinterpret_vm_vd(vd)); + r.y = _mm256_extractf128_si256(vreinterpret_vm_vd(vd), 1); + return r; +} +static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { + vmask m = _mm256_castsi128_si256(vi.x); + m = _mm256_insertf128_si256(m, vi.y, 1); + return vreinterpret_vd_vm(m); +} + +// + +static vint2 vloadu_vi2_p(int32_t *p) { + vint2 r; + r.x = _mm_loadu_si128((__m128i *) p ); + r.y = _mm_loadu_si128((__m128i *)(p + 4)); + return r; +} + +static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { + _mm_storeu_si128((__m128i *) p , v.x); + _mm_storeu_si128((__m128i *)(p + 4), v.y); +} + +static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } +static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } + +// + +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } + +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } + +static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } + +static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } + +static INLINE vopmask vcast_vo32_vo64(vopmask o) { + return _mm256_castsi128_si256(_mm256_cvtpd_epi32(_mm256_and_pd(vreinterpret_vd_vm(o), _mm256_set1_pd(-1.0)))); +} + +static INLINE vopmask vcast_vo64_vo32(vopmask o) { + return vreinterpret_vm_vd(_mm256_cmp_pd(_mm256_cvtepi32_pd(_mm256_castsi256_si128(o)), _mm256_set1_pd(-1.0), _CMP_EQ_OQ)); +} + +// + +static INLINE vint vrint_vi_vd(vdouble vd) { return _mm256_cvtpd_epi32(vd); } +static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm256_cvttpd_epi32(vd); } +static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } +static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } +static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm256_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } +static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm256_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } +static INLINE vdouble vcast_vd_vi(vint vi) { return _mm256_cvtepi32_pd(vi); } +static INLINE vint vcast_vi_i(int i) { return _mm_set1_epi32(i); } +static INLINE vint2 vcastu_vi2_vi(vint vi) { + vint2 r; + r.x = _mm_and_si128(_mm_shuffle_epi32(vi, 0x40), _mm_set_epi32(-1, 0, -1, 0)); + r.y = _mm_and_si128(_mm_shuffle_epi32(vi, 0xc8), _mm_set_epi32(-1, 0, -1, 0)); + return r; +} + +static INLINE vint vcastu_vi_vi2(vint2 vi) { + return _mm_or_si128(_mm_and_si128(_mm_shuffle_epi32(vi.x, 0x0d), _mm_set_epi32( 0, 0, -1, -1)), + _mm_and_si128(_mm_shuffle_epi32(vi.y, 0xd0), _mm_set_epi32(-1, -1, 0, 0))); +} + +static INLINE vmask vcast_vm_i_i(int i0, int i1) { + return _mm256_set_epi32(i0, i1, i0, i1, i0, i1, i0, i1); +} + +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { + return vreinterpret_vm_vd(_mm256_cmp_pd(vreinterpret_vd_vm(vxor_vm_vm_vm(vxor_vm_vm_vm(x, y), vreinterpret_vm_vd(_mm256_set1_pd(1.0)))), _mm256_set1_pd(1.0), _CMP_EQ_OQ)); +} + +// + +static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_add_pd(x, y); } +static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm256_sub_pd(x, y); } +static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm256_mul_pd(x, y); } +static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm256_div_pd(x, y); } +static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm256_div_pd(_mm256_set1_pd(1), x); } +static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm256_sqrt_pd(x); } +static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm256_andnot_pd(_mm256_set1_pd(-0.0), d); } +static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm256_xor_pd(_mm256_set1_pd(-0.0), d); } +static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm256_max_pd(x, y); } +static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd(x, y); } + +#if CONFIG == 1 +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(z, vmul_vd_vd_vd(x, y)); } +#else +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); } +static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmacc_pd(x, y, z); } +static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); } +static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); } +static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); } +static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmacc_pd(x, y, z); } +static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmsub_pd(x, y, z); } +#endif + +static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_EQ_OQ)); } +static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_NEQ_UQ)); } +static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LT_OQ)); } +static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LE_OQ)); } +static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GT_OQ)); } +static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GE_OQ)); } + +// + +static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } +static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } +static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } + +static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } +static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } +static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } +static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } + +static INLINE vint vandnot_vi_vo_vi(vopmask m, vint y) { return _mm_andnot_si128(_mm256_castsi256_si128(m), y); } +static INLINE vint vand_vi_vo_vi(vopmask m, vint y) { return _mm_and_si128(_mm256_castsi256_si128(m), y); } + +static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } +static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } +static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } + +static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } +static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } + +static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpeq_epi32(x, y)); } +static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpgt_epi32(x, y)); } + +static INLINE vint vsel_vi_vo_vi_vi(vopmask o, vint x, vint y) { return _mm_blendv_epi8(y, x, _mm256_castsi256_si128(o)); } + +static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm256_blendv_pd(y, x, _mm256_castsi256_pd(o)); } + +static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { + return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); +} + +static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); +} + +static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); +} + +static INLINE vopmask visinf_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); +} + +static INLINE vopmask vispinf_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); +} + +static INLINE vopmask visminf_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ)); +} + +static INLINE vopmask visnan_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm256_cmp_pd(d, d, _CMP_NEQ_UQ)); +} + +static INLINE vdouble vload_vd_p(const double *ptr) { return _mm256_load_pd(ptr); } +static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm256_loadu_pd(ptr); } + +static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm256_store_pd(ptr, v); } +static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm256_storeu_pd(ptr, v); } + +static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { + int a[VECTLENDP]; + vstoreu_v_p_vi(a, vi); + return _mm256_set_pd(ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); +} + +#if defined(_MSC_VER) +// This function is needed when debugging on MSVC. +static INLINE double vcast_d_vd(vdouble v) { + double a[VECTLENDP]; + vstoreu_v_p_vd(a, v); + return a[0]; +} +#endif + +// + +static INLINE vint2 vcast_vi2_vm(vmask vm) { + vint2 r; + r.x = _mm256_castsi256_si128(vm); + r.y = _mm256_extractf128_si256(vm, 1); + return r; +} + +static INLINE vmask vcast_vm_vi2(vint2 vi) { + vmask m = _mm256_castsi128_si256(vi.x); + m = _mm256_insertf128_si256(m, vi.y, 1); + return m; +} + +static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvtps_epi32(vf)); } +static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvttps_epi32(vf)); } +static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm256_cvtepi32_ps(vcast_vm_vi2(vi)); } +static INLINE vfloat vcast_vf_f(float f) { return _mm256_set1_ps(f); } +static INLINE vint2 vcast_vi2_i(int i) { vint2 r; r.x = r.y = _mm_set1_epi32(i); return r; } +static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm256_castps_si256(vf); } +static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm256_castsi256_ps(vm); } + +static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return vreinterpret_vf_vm(vcast_vm_vi2(vi)); } +static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return vcast_vi2_vm(vreinterpret_vm_vf(vf)); } + +static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_add_ps(x, y); } +static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm256_sub_ps(x, y); } +static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm256_mul_ps(x, y); } +static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm256_div_ps(x, y); } +static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } +static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm256_sqrt_ps(x); } +static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } +static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } +static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm256_max_ps(x, y); } +static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm256_min_ps(x, y); } + +#if CONFIG == 1 +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +#else +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmacc_ps(x, y, z); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_msub_ps(x, y, z); } +static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); } +static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); } +static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_msub_ps(x, y, z); } +static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmacc_ps(x, y, z); } +static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmsub_ps(x, y, z); } +#endif + +static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_EQ_OQ)); } +static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_NEQ_UQ)); } +static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LT_OQ)); } +static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LE_OQ)); } +static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GT_OQ)); } +static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GE_OQ)); } + +static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 vi = { _mm_add_epi32(x.x, y.x), _mm_add_epi32(x.y, y.y) }; + return vi; +} + +static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 vi = { _mm_sub_epi32(x.x, y.x), _mm_sub_epi32(x.y, y.y) }; + return vi; +} + +static INLINE vint2 vneg_vi2_vi2(vint2 e) { + vint2 vi = { _mm_sub_epi32(_mm_set1_epi32(0), e.x), _mm_sub_epi32(_mm_set1_epi32(0), e.y) }; + return vi; +} + +static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 vi = { _mm_and_si128(x.x, y.x), _mm_and_si128(x.y, y.y) }; + return vi; +} + +static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 vi = { _mm_andnot_si128(x.x, y.x), _mm_andnot_si128(x.y, y.y) }; + return vi; +} + +static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 vi = { _mm_or_si128(x.x, y.x), _mm_or_si128(x.y, y.y) }; + return vi; +} + +static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 vi = { _mm_xor_si128(x.x, y.x), _mm_xor_si128(x.y, y.y) }; + return vi; +} + +static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi2_vi2_vi2(vcast_vi2_vm(x), y); } +static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi2_vi2_vi2(vcast_vi2_vm(x), y); } + +static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { + vint2 vi = { _mm_slli_epi32(x.x, c), _mm_slli_epi32(x.y, c) }; + return vi; +} + +static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { + vint2 vi = { _mm_srli_epi32(x.x, c), _mm_srli_epi32(x.y, c) }; + return vi; +} + +static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { + vint2 vi = { _mm_srai_epi32(x.x, c), _mm_srai_epi32(x.y, c) }; + return vi; +} + +static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { + vint2 r; + r.x = _mm_cmpeq_epi32(x.x, y.x); + r.y = _mm_cmpeq_epi32(x.y, y.y); + return vcast_vm_vi2(r); +} + +static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { + vint2 r; + r.x = _mm_cmpgt_epi32(x.x, y.x); + r.y = _mm_cmpgt_epi32(x.y, y.y); + return vcast_vm_vi2(r); +} + +static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 r; + r.x = _mm_cmpeq_epi32(x.x, y.x); + r.y = _mm_cmpeq_epi32(x.y, y.y); + return r; +} + +static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 r; + r.x = _mm_cmpgt_epi32(x.x, y.x); + r.y = _mm_cmpgt_epi32(x.y, y.y); + return r; +} + +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { + vint2 n = vcast_vi2_vm(m); + vint2 r = { _mm_blendv_epi8(y.x, x.x, n.x), _mm_blendv_epi8(y.y, x.y, n.y) }; + return r; +} + +static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { + vint2 ix = vcast_vi2_vm(x), iy = vcast_vi2_vm(y), iz; + iz.x = _mm_add_epi64(ix.x, iy.x); + iz.y = _mm_add_epi64(ix.y, iy.y); + return vcast_vm_vi2(iz); +} + +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return _mm256_blendv_ps(y, x, _mm256_castsi256_ps(o)); } + +static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { + return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); +} + +static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); +} + +static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); +} + +static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } +static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } + +// + +static INLINE vfloat vload_vf_p(const float *ptr) { return _mm256_load_ps(ptr); } +static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm256_loadu_ps(ptr); } + +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm256_store_ps(ptr, v); } +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm256_storeu_ps(ptr, v); } + +static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { + int a[VECTLENSP]; + vstoreu_v_p_vi2(a, vi2); + return _mm256_set_ps(ptr[a[7]], ptr[a[6]], ptr[a[5]], ptr[a[4]], + ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); +} + +#ifdef _MSC_VER +// This function is needed when debugging on MSVC. +static INLINE float vcast_f_vf(vfloat v) { + float a[VECTLENSP]; + vstoreu_v_p_vf(a, v); + return a[0]; +} +#endif +// + +#define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) +#define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) +#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) +#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) + +static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } +static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } +static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } +static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } + +static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_addsub_pd(x, y); } +static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_addsub_ps(x, y); } + +#if CONFIG == 1 +static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +#else +static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } +static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } +#endif + + +static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm256_shuffle_pd(d0, d0, (0 << 3) | (1 << 2) | (0 << 1) | (1 << 0)); } +static INLINE vdouble vreva2_vd_vd(vdouble d0) { d0 = _mm256_permute2f128_pd(d0, d0, 1); return _mm256_shuffle_pd(d0, d0, (1 << 3) | (0 << 2) | (1 << 1) | (0 << 0)); } + +static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm256_stream_pd(ptr, v); } +static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { + _mm_store_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); + _mm_store_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); +} + +static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { + _mm_stream_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); + _mm_stream_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); +} + +// + +static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm256_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } +static INLINE vfloat vreva2_vf_vf(vfloat d0) { d0 = _mm256_permute2f128_ps(d0, d0, 1); return _mm256_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } +static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } + +static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm256_stream_ps(ptr, v); } + +static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); + _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); + _mm_storel_pd((double *)(ptr+(offset + step * 2)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); + _mm_storeh_pd((double *)(ptr+(offset + step * 3)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); +} + +static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } + +// + +static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { + return (vmask2) { + vreinterpret_vm_vd(_mm256_unpacklo_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))), + vreinterpret_vm_vd(_mm256_unpackhi_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))) }; +} + +static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { + return (vmask2) { + vreinterpret_vm_vd(_mm256_unpacklo_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))), + vreinterpret_vm_vd(_mm256_unpackhi_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))) }; +} + +static INLINE vint vuninterleave_vi_vi(vint v) { + return _mm_shuffle_epi32(v, (0 << 0) | (2 << 2) | (1 << 4) | (3 << 6)); +} + +static INLINE vdouble vinterleave_vd_vd(vdouble vd) { + double tmp[4]; + vstoreu_v_p_vd(tmp, vd); + double t = tmp[1]; tmp[1] = tmp[2]; tmp[2] = t; + return vloadu_vd_p(tmp); +} + +static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { + double tmp[4]; + vstoreu_v_p_vd(tmp, vd); + double t = tmp[1]; tmp[1] = tmp[2]; tmp[2] = t; + return vloadu_vd_p(tmp); +} + +static INLINE vmask vinterleave_vm_vm(vmask vm) { + double tmp[4]; + vstoreu_v_p_vd(tmp, vreinterpret_vd_vm(vm)); + double t = tmp[1]; tmp[1] = tmp[2]; tmp[2] = t; + return vreinterpret_vm_vd(vloadu_vd_p(tmp)); +} + +static INLINE vmask vuninterleave_vm_vm(vmask vm) { + double tmp[4]; + vstoreu_v_p_vd(tmp, vreinterpret_vd_vm(vm)); + double t = tmp[1]; tmp[1] = tmp[2]; tmp[2] = t; + return vreinterpret_vm_vd(vloadu_vd_p(tmp)); +} + +static vmask2 vloadu_vm2_p(void *p) { + vmask2 vm2; + memcpy(&vm2, p, VECTLENDP * 16); + return vm2; +} + +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad4 vargquad; + +static INLINE vmask2 vcast_vm2_aq(vargquad aq) { + return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); +} + +static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { + vm2 = vuninterleave_vm2_vm2(vm2); + vargquad aq; + memcpy(&aq, &vm2, VECTLENDP * 16); + return aq; +} +#endif // #if !defined(SLEEF_GENHEADER) + +static INLINE int vtestallzeros_i_vo64(vopmask g) { + return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; +} + +static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { + return vreinterpret_vm_vd(_mm256_blendv_pd(vreinterpret_vd_vm(y), vreinterpret_vd_vm(x), vreinterpret_vd_vm(o))); +} + +static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { + __m128i xh = _mm256_extractf128_si256(x, 1), xl = _mm256_extractf128_si256(x, 0); + __m128i yh = _mm256_extractf128_si256(y, 1), yl = _mm256_extractf128_si256(y, 0); + vmask r = _mm256_castsi128_si256(_mm_sub_epi64(xl, yl)); + return _mm256_insertf128_si256(r, _mm_sub_epi64(xh, yh), 1); +} + +static INLINE vmask vneg64_vm_vm(vmask x) { return vsub64_vm_vm_vm(vcast_vm_i_i(0, 0), x); } +static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { + __m128i xh = _mm256_extractf128_si256(x, 1), xl = _mm256_extractf128_si256(x, 0); + __m128i yh = _mm256_extractf128_si256(y, 1), yl = _mm256_extractf128_si256(y, 0); + vmask r = _mm256_castsi128_si256(_mm_cmpgt_epi64(xl, yl)); + return _mm256_insertf128_si256(r, _mm_cmpgt_epi64(xh, yh), 1); +} + +#define vsll64_vm_vm_i(x, c) \ + _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_slli_epi64(_mm256_extractf128_si256(x, 0), c)), \ + _mm_slli_epi64(_mm256_extractf128_si256(x, 1), c), 1) +#define vsrl64_vm_vm_i(x, c) \ + _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), \ + _mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1) + +//@#define vsll64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_slli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_slli_epi64(_mm256_extractf128_si256(x, 1), c), 1) +//@#define vsrl64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1) + +static INLINE vmask vcast_vm_vi(vint vi) { + vint vi0 = _mm_and_si128(_mm_shuffle_epi32(vi, (1 << 4) | (1 << 6)), _mm_set_epi32(0, -1, 0, -1)); + vint vi1 = _mm_and_si128(_mm_shuffle_epi32(vi, (2 << 0) | (2 << 2) | (3 << 4) | (3 << 6)), _mm_set_epi32(0, -1, 0, -1)); + vmask m = _mm256_insertf128_si256(_mm256_castsi128_si256(vi0), vi1, 1); + return vor_vm_vm_vm(vcast_vm_vi2(vcastu_vi2_vi(vand_vi_vo_vi(vgt_vo_vi_vi(vcast_vi_i(0), vi), vcast_vi_i(-1)))), m); +} +static INLINE vint vcast_vi_vm(vmask vm) { + return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vm)), _mm_set1_ps(0), 0x08)), + _mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vm, 1)), 0x80))); +} diff --git a/src/helperavx2.h b/src/helperavx2.h new file mode 100644 index 00000000..29fe8707 --- /dev/null +++ b/src/helperavx2.h @@ -0,0 +1,512 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#if CONFIG == 1 + +#if !defined(__AVX2__) && !defined(SLEEF_GENHEADER) +#error Please specify -mavx2. +#endif + +#else +#error CONFIG macro invalid or not defined +#endif + +#define ENABLE_DP +//@#define ENABLE_DP +#define LOG2VECTLENDP 2 +//@#define LOG2VECTLENDP 2 +#define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) +#define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP + +#define ENABLE_SP +//@#define ENABLE_SP +#define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) +#define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) +#define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP + +#define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING +#define ACCURATE_SQRT +//@#define ACCURATE_SQRT + +#if !defined(SLEEF_GENHEADER) +#if defined(_MSC_VER) +#include +#else +#include +#endif + +#include +#include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) + +typedef __m256i vmask; +typedef __m256i vopmask; + +typedef __m256d vdouble; +typedef __m128i vint; + +typedef __m256 vfloat; +typedef __m256i vint2; + +typedef struct { + vmask x, y; +} vmask2; + +// + +#if !defined(SLEEF_GENHEADER) + +#ifndef __SLEEF_H__ +static inline + void Sleef_x86CpuID(int32_t out[4], uint32_t eax, + uint32_t ecx) { + /* We don't care for cpuid detection */ + out[0] = 0xFFFFFFFF; + out[1] = 0xFFFFFFFF; + out[2] = 0xFFFFFFFF; + out[3] = 0xFFFFFFFF; + } + #endif + +static INLINE int cpuSupportsAVX2() { + int32_t reg[4]; + Sleef_x86CpuID(reg, 7, 0); + return (reg[1] & (1 << 5)) != 0; +} + +static INLINE int cpuSupportsFMA() { + int32_t reg[4]; + Sleef_x86CpuID(reg, 1, 0); + return (reg[2] & (1 << 12)) != 0; +} + +#if CONFIG == 1 && defined(__AVX2__) +static INLINE int vavailability_i(int name) { + int d = cpuSupportsAVX2() && cpuSupportsFMA(); + return d ? 3 : 0; +} +#define ISANAME "AVX2" +#define DFTPRIORITY 25 +#endif + +#endif // #if !defined(SLEEF_GENHEADER) + +static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } + +static INLINE int vtestallones_i_vo32(vopmask g) { + return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); +} + +static INLINE int vtestallones_i_vo64(vopmask g) { + return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); +} + +// + +static INLINE vdouble vcast_vd_d(double d) { return _mm256_set1_pd(d); } +static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm256_castpd_si256(vd); } +static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm256_castsi256_pd(vm); } +static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm256_castpd_si256(vd); } +static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm256_castsi256_pd(vi); } + +// + +static vint2 vloadu_vi2_p(int32_t *p) { return _mm256_loadu_si256((__m256i const *)p); } +static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm256_storeu_si256((__m256i *)p, v); } +static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } +static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } + +// + +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } + +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } + +static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } + +static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } +static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } + +static INLINE vopmask vcast_vo32_vo64(vopmask o) { + return _mm256_permutevar8x32_epi32(o, _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0)); +} + +static INLINE vopmask vcast_vo64_vo32(vopmask o) { + return _mm256_permutevar8x32_epi32(o, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); +} + +// + +static INLINE vint vrint_vi_vd(vdouble vd) { return _mm256_cvtpd_epi32(vd); } +static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm256_cvttpd_epi32(vd); } +static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } +static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm256_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } +static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } +static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm256_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } +static INLINE vdouble vcast_vd_vi(vint vi) { return _mm256_cvtepi32_pd(vi); } +static INLINE vint vcast_vi_i(int i) { return _mm_set1_epi32(i); } + +static INLINE vint2 vcastu_vi2_vi(vint vi) { + return _mm256_slli_epi64(_mm256_cvtepi32_epi64(vi), 32); +} + +static INLINE vint vcastu_vi_vi2(vint2 vi) { + return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vi)), _mm_set1_ps(0), 0x0d)), + _mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vi, 1)), 0xd0))); +} + +static INLINE vmask vcast_vm_i_i(int i0, int i1) { + return _mm256_set_epi32(i0, i1, i0, i1, i0, i1, i0, i1); +} + +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm256_cmpeq_epi64(x, y); } +static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm256_add_epi64(x, y); } + +// + +static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_add_pd(x, y); } +static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm256_sub_pd(x, y); } +static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm256_mul_pd(x, y); } +static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm256_div_pd(x, y); } +static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm256_div_pd(_mm256_set1_pd(1), x); } +static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm256_sqrt_pd(x); } +static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm256_andnot_pd(_mm256_set1_pd(-0.0), d); } +static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm256_xor_pd(_mm256_set1_pd(-0.0), d); } +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmsub_pd(x, y, z); } +static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmadd_pd(x, y, z); } +static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm256_max_pd(x, y); } +static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd(x, y); } + +static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); } +static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); } +static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmsub_pd(x, y, z); } +static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmadd_pd(x, y, z); } +static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmsub_pd(x, y, z); } + +static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_EQ_OQ)); } +static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_NEQ_UQ)); } +static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LT_OQ)); } +static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LE_OQ)); } +static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GT_OQ)); } +static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GE_OQ)); } + +// + +static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } +static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } +static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } + +static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } +static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } +static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } +static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } + +static INLINE vint vandnot_vi_vo_vi(vopmask m, vint y) { return _mm_andnot_si128(_mm256_castsi256_si128(m), y); } +static INLINE vint vand_vi_vo_vi(vopmask m, vint y) { return _mm_and_si128(_mm256_castsi256_si128(m), y); } + +static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } +static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } +static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } + +static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } +static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } + +static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpeq_epi32(x, y)); } +static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpgt_epi32(x, y)); } + +static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, _mm256_castsi256_si128(m)); } + +static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm256_blendv_pd(y, x, _mm256_castsi256_pd(o)); } +static INLINE vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return _mm256_permutevar_pd(_mm256_set_pd(v1, v0, v1, v0), o); } + +static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + __m256i v = _mm256_castpd_si256(vsel_vd_vo_vd_vd(o0, _mm256_castsi256_pd(_mm256_set_epi32(1, 0, 1, 0, 1, 0, 1, 0)), + vsel_vd_vo_vd_vd(o1, _mm256_castsi256_pd(_mm256_set_epi32(3, 2, 3, 2, 3, 2, 3, 2)), + vsel_vd_vo_vd_vd(o2, _mm256_castsi256_pd(_mm256_set_epi32(5, 4, 5, 4, 5, 4, 5, 4)), + _mm256_castsi256_pd(_mm256_set_epi32(7, 6, 7, 6, 7, 6, 7, 6)))))); + return _mm256_castsi256_pd(_mm256_permutevar8x32_epi32(_mm256_castpd_si256(_mm256_set_pd(d3, d2, d1, d0)), v)); +} + +static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2); +} + +static INLINE vopmask visinf_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); +} + +static INLINE vopmask vispinf_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); +} + +static INLINE vopmask visminf_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ)); +} + +static INLINE vopmask visnan_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm256_cmp_pd(d, d, _CMP_NEQ_UQ)); +} + +#if defined(_MSC_VER) +// This function is needed when debugging on MSVC. +static INLINE double vcast_d_vd(vdouble v) { + double s[4]; + _mm256_storeu_pd(s, v); + return s[0]; +} +#endif + +static INLINE vdouble vload_vd_p(const double *ptr) { return _mm256_load_pd(ptr); } +static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm256_loadu_pd(ptr); } + +static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm256_store_pd(ptr, v); } +static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm256_storeu_pd(ptr, v); } + +static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return _mm256_i32gather_pd(ptr, vi, 8); } + +// + +static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } +static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } + +static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvtps_epi32(vf)); } +static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvttps_epi32(vf)); } +static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm256_cvtepi32_ps(vcast_vm_vi2(vi)); } +static INLINE vfloat vcast_vf_f(float f) { return _mm256_set1_ps(f); } +static INLINE vint2 vcast_vi2_i(int i) { return _mm256_set1_epi32(i); } +static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm256_castps_si256(vf); } +static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm256_castsi256_ps(vm); } + +static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return vreinterpret_vf_vm(vcast_vm_vi2(vi)); } +static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return vcast_vi2_vm(vreinterpret_vm_vf(vf)); } + +static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_add_ps(x, y); } +static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm256_sub_ps(x, y); } +static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm256_mul_ps(x, y); } +static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm256_div_ps(x, y); } +static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } +static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm256_sqrt_ps(x); } +static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } +static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmsub_ps(x, y, z); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmadd_ps(x, y, z); } +static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm256_max_ps(x, y); } +static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm256_min_ps(x, y); } + +static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); } +static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); } +static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmsub_ps(x, y, z); } +static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmadd_ps(x, y, z); } +static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmsub_ps(x, y, z); } + +static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_EQ_OQ)); } +static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_NEQ_UQ)); } +static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LT_OQ)); } +static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LE_OQ)); } +static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GT_OQ)); } +static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GE_OQ)); } + +static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_add_epi32(x, y); } +static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_sub_epi32(x, y); } +static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } + +static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_and_si256(x, y); } +static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_andnot_si256(x, y); } +static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_or_si256(x, y); } +static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_xor_si256(x, y); } + +static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi2_vi2_vi2(vcast_vi2_vm(x), y); } +static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi2_vi2_vi2(vcast_vi2_vm(x), y); } + +static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return _mm256_slli_epi32(x, c); } +static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return _mm256_srli_epi32(x, c); } +static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return _mm256_srai_epi32(x, c); } + +static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpeq_epi32(x, y); } +static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpgt_epi32(x, y); } +static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpeq_epi32(x, y); } +static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpgt_epi32(x, y); } + +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { + return _mm256_blendv_epi8(y, x, m); +} + +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return _mm256_blendv_ps(y, x, _mm256_castsi256_ps(o)); } + +// At this point, the following three functions are implemented in a generic way, +// but I will try target-specific optimization later on. +static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { + return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); +} + +static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); +} + +static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); +} + +static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } +static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } + +#ifdef _MSC_VER +// This function is needed when debugging on MSVC. +static INLINE float vcast_f_vf(vfloat v) { + float s[8]; + _mm256_storeu_ps(s, v); + return s[0]; +} +#endif + +static INLINE vfloat vload_vf_p(const float *ptr) { return _mm256_load_ps(ptr); } +static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm256_loadu_ps(ptr); } + +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm256_store_ps(ptr, v); } +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm256_storeu_ps(ptr, v); } + +static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm256_i32gather_ps(ptr, vi2, 4); } + +// + +#define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) +#define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) +#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) +#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) + +static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } +static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } +static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } +static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } + +static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_addsub_pd(x, y); } +static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_addsub_ps(x, y); } + +static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } +static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } + +static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm256_shuffle_pd(d0, d0, (0 << 3) | (1 << 2) | (0 << 1) | (1 << 0)); } +static INLINE vdouble vreva2_vd_vd(vdouble d0) { d0 = _mm256_permute2f128_pd(d0, d0, 1); return _mm256_shuffle_pd(d0, d0, (1 << 3) | (0 << 2) | (1 << 1) | (0 << 0)); } + +static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm256_stream_pd(ptr, v); } +static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { + _mm_store_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); + _mm_store_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); +} + +static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { + _mm_stream_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); + _mm_stream_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); +} + +// + +static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm256_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } +static INLINE vfloat vreva2_vf_vf(vfloat d0) { d0 = _mm256_permute2f128_ps(d0, d0, 1); return _mm256_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } +static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } + +static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm256_stream_ps(ptr, v); } + +static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); + _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); + _mm_storel_pd((double *)(ptr+(offset + step * 2)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); + _mm_storeh_pd((double *)(ptr+(offset + step * 3)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); +} + +static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } + +// + +static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { + return (vmask2) { _mm256_unpacklo_epi64(v.x, v.y), _mm256_unpackhi_epi64(v.x, v.y) }; +} + +static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { + return (vmask2) { _mm256_unpacklo_epi64(v.x, v.y), _mm256_unpackhi_epi64(v.x, v.y) }; +} + +static INLINE vint vuninterleave_vi_vi(vint v) { + return _mm_shuffle_epi32(v, (0 << 0) | (2 << 2) | (1 << 4) | (3 << 6)); +} + +static INLINE vdouble vinterleave_vd_vd(vdouble vd) { + return vreinterpret_vd_vm(_mm256_permute4x64_epi64(vreinterpret_vm_vd(vd), (3 << 6) | (1 << 4) | (2 << 2) | (0 << 0))); +} + +static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { + return vreinterpret_vd_vm(_mm256_permute4x64_epi64(vreinterpret_vm_vd(vd), (3 << 6) | (1 << 4) | (2 << 2) | (0 << 0))); +} + +static INLINE vmask vinterleave_vm_vm(vmask vm) { + return _mm256_permute4x64_epi64(vm, (3 << 6) | (1 << 4) | (2 << 2) | (0 << 0)); +} + +static INLINE vmask vuninterleave_vm_vm(vmask vm) { + return _mm256_permute4x64_epi64(vm, (3 << 6) | (1 << 4) | (2 << 2) | (0 << 0)); +} + +static vmask2 vloadu_vm2_p(void *p) { + vmask2 vm2; + memcpy(&vm2, p, VECTLENDP * 16); + return vm2; +} + +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad4 vargquad; + +static INLINE vmask2 vcast_vm2_aq(vargquad aq) { + return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); +} + +static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { + vm2 = vuninterleave_vm2_vm2(vm2); + vargquad aq; + memcpy(&aq, &vm2, VECTLENDP * 16); + return aq; +} +#endif // #if !defined(SLEEF_GENHEADER) + +static INLINE int vtestallzeros_i_vo64(vopmask g) { + return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; +} + +static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return _mm256_blendv_epi8(y, x, o); } + +static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return _mm256_sub_epi64(x, y); } +static INLINE vmask vneg64_vm_vm(vmask x) { return _mm256_sub_epi64(vcast_vm_i_i(0, 0), x); } +static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm256_cmpgt_epi64(x, y); } // signed compare + +#define vsll64_vm_vm_i(x, c) _mm256_slli_epi64(x, c) +#define vsrl64_vm_vm_i(x, c) _mm256_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm256_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm256_srli_epi64(x, c) + +static INLINE vmask vcast_vm_vi(vint vi) { return _mm256_cvtepi32_epi64(vi); } +static INLINE vint vcast_vi_vm(vmask vm) { + return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vm)), _mm_set1_ps(0), 0x08)), + _mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vm, 1)), 0x80))); +} diff --git a/src/helperavx512f.h b/src/helperavx512f.h new file mode 100644 index 00000000..7fdec168 --- /dev/null +++ b/src/helperavx512f.h @@ -0,0 +1,627 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#if CONFIG == 1 || CONFIG == 2 + +#if !defined(__AVX512F__) && !defined(SLEEF_GENHEADER) +#error Please specify -mavx512f. +#endif + +#else +#error CONFIG macro invalid or not defined +#endif + +#define ENABLE_DP +//@#define ENABLE_DP +#define LOG2VECTLENDP 3 +//@#define LOG2VECTLENDP 3 +#define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) + +#define ENABLE_SP +//@#define ENABLE_SP +#define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) +#define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) + +#if CONFIG == 1 +#define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP +#define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP +#endif + +#define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING +#define ACCURATE_SQRT +//@#define ACCURATE_SQRT + +#if !defined(SLEEF_GENHEADER) +#if defined(_MSC_VER) +#include +#else +#include +#endif + +#include +#include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) + +typedef __m512i vmask; +typedef __mmask16 vopmask; + +typedef __m512d vdouble; +typedef __m256i vint; + +typedef __m512 vfloat; +typedef __m512i vint2; + +typedef struct { + vmask x, y; +} vmask2; + +// + +#if !defined(SLEEF_GENHEADER) + +#ifndef __SLEEF_H__ +static inline + void Sleef_x86CpuID(int32_t out[4], uint32_t eax, + uint32_t ecx) { + /* We don't care for cpuid detection */ + out[0] = 0xFFFFFFFF; + out[1] = 0xFFFFFFFF; + out[2] = 0xFFFFFFFF; + out[3] = 0xFFFFFFFF; + } + #endif + +static INLINE int cpuSupportsAVX512F() { + int32_t reg[4]; + Sleef_x86CpuID(reg, 7, 0); + return (reg[1] & (1 << 16)) != 0; +} + +#if CONFIG == 1 && defined(__AVX512F__) +static INLINE int vavailability_i(int name) { + int d = cpuSupportsAVX512F(); + return d ? 3 : 0; +} +#define ISANAME "AVX512F" +#define DFTPRIORITY 30 +#endif + +#if CONFIG == 2 && defined(__AVX512F__) +static INLINE int vavailability_i(int name) { + int d = cpuSupportsAVX512F(); + return d ? 3 : 0; +} +#define ISANAME "AVX512FNOFMA" +#define DFTPRIORITY 0 +#endif + +#endif // #if !defined(SLEEF_GENHEADER) + +static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } + +#ifdef __INTEL_COMPILER +static INLINE int vtestallones_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0xff; } +static INLINE int vtestallones_i_vo32(vopmask g) { return _mm512_mask2int(g) == 0xffff; } +#else +static INLINE int vtestallones_i_vo64(vopmask g) { return g == 0xff; } +static INLINE int vtestallones_i_vo32(vopmask g) { return g == 0xffff; } +#endif + +// + +static vint2 vloadu_vi2_p(int32_t *p) { return _mm512_loadu_si512((__m512i const *)p); } +static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm512_storeu_si512((__m512i *)p, v); } +static vint vloadu_vi_p(int32_t *p) { return _mm256_loadu_si256((__m256i const *)p); } +static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm256_storeu_si256((__m256i *)p, v); } + +// + +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return _mm512_and_si512(x, y); } +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return _mm512_andnot_si512(x, y); } +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return _mm512_or_si512(x, y); } +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return _mm512_xor_si512(x, y); } + +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kand(x, y); } +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kandn(x, y); } +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kor(x, y); } +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kxor(x, y); } + +static INLINE vmask vand_vm_vo64_vm(vopmask o, vmask m) { return _mm512_mask_and_epi64(_mm512_set1_epi32(0), o, m, m); } +static INLINE vmask vandnot_vm_vo64_vm(vopmask o, vmask m) { return _mm512_mask_and_epi64(m, o, _mm512_set1_epi32(0), _mm512_set1_epi32(0)); } +static INLINE vmask vor_vm_vo64_vm(vopmask o, vmask m) { return _mm512_mask_or_epi64(m, o, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } + +static INLINE vmask vand_vm_vo32_vm(vopmask o, vmask m) { return _mm512_mask_and_epi32(_mm512_set1_epi32(0), o, m, m); } +static INLINE vmask vandnot_vm_vo32_vm(vopmask o, vmask m) { return _mm512_mask_and_epi32(m, o, _mm512_set1_epi32(0), _mm512_set1_epi32(0)); } +static INLINE vmask vor_vm_vo32_vm(vopmask o, vmask m) { return _mm512_mask_or_epi32(m, o, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } + +static INLINE vopmask vcast_vo32_vo64(vopmask o) { return o; } +static INLINE vopmask vcast_vo64_vo32(vopmask o) { return o; } + +// + +static INLINE vint vrint_vi_vd(vdouble vd) { + return _mm512_cvt_roundpd_epi32(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); +} + +static INLINE vint vtruncate_vi_vd(vdouble vd) { + return _mm512_cvt_roundpd_epi32(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); +} + +static INLINE vdouble vcast_vd_vi(vint vi) { return _mm512_cvtepi32_pd(vi); } +static INLINE vint vcast_vi_i(int i) { return _mm256_set1_epi32(i); } + +static INLINE vdouble vtruncate_vd_vd(vdouble vd) { + return _mm512_roundscale_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); +} + +static INLINE vdouble vrint_vd_vd(vdouble vd) { + return _mm512_roundscale_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); +} + +static INLINE vint2 vcastu_vi2_vi(vint vi) { + return _mm512_maskz_permutexvar_epi32(0xaaaa, _mm512_set_epi32(7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0), _mm512_castsi256_si512(vi)); +} + +static INLINE vint vcastu_vi_vi2(vint2 vi) { + return _mm512_castsi512_si256(_mm512_maskz_permutexvar_epi32(0x00ff, _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 15, 13, 11, 9, 7, 5, 3, 1), vi)); +} + +static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm512_set_epi32(i0, i1, i0, i1, i0, i1, i0, i1, i0, i1, i0, i1, i0, i1, i0, i1); } + +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_EQ); } +static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm512_add_epi64(x, y); } + +// + +static INLINE vdouble vcast_vd_d(double d) { return _mm512_set1_pd(d); } +static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm512_castpd_si512(vd); } +static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm512_castsi512_pd(vm); } +static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm512_castpd_si512(vd); } +static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm512_castsi512_pd(vi); } + +static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm512_add_pd(x, y); } +static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm512_sub_pd(x, y); } +static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm512_mul_pd(x, y); } +static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm512_div_pd(x, y); } +static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm512_div_pd(_mm512_set1_pd(1), x); } +static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm512_sqrt_pd(x); } +static INLINE vdouble vabs_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_andnot_si512(vreinterpret_vm_vd(_mm512_set1_pd(-0.0)), vreinterpret_vm_vd(d))); } +static INLINE vdouble vneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_xor_si512(vreinterpret_vm_vd(_mm512_set1_pd(-0.0)), vreinterpret_vm_vd(d))); } +static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm512_max_pd(x, y); } +static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm512_min_pd(x, y); } + +#if CONFIG == 1 +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmadd_pd(x, y, z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmsub_pd(x, y, z); } +static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fnmadd_pd(x, y, z); } +#else +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +#endif + +static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmadd_pd(x, y, z); } +static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmadd_pd(x, y, z); } +static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmsub_pd(x, y, z); } +static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fnmadd_pd(x, y, z); } +static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fnmsub_pd(x, y, z); } + +static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_EQ_OQ); } +static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_NEQ_UQ); } +static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_LT_OQ); } +static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_LE_OQ); } +static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_GT_OQ); } +static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_GE_OQ); } + +// + +static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm256_add_epi32(x, y); } +static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm256_sub_epi32(x, y); } +static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } + +static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm256_and_si256(x, y); } +static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm256_andnot_si256(x, y); } + +static INLINE vint vandnot_vi_vo_vi(vopmask o, vint y) { + return _mm512_castsi512_si256(_mm512_mask_and_epi32(_mm512_castsi256_si512(y), o, _mm512_set1_epi32(0), _mm512_set1_epi32(0))); +} +static INLINE vint vand_vi_vo_vi(vopmask o, vint y) { + return _mm512_castsi512_si256(_mm512_mask_and_epi32(_mm512_set1_epi32(0), o, _mm512_castsi256_si512(y), _mm512_castsi256_si512(y))); +} + +static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm256_or_si256(x, y); } +static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm256_xor_si256(x, y); } +#define vsll_vi_vi_i(x, c) _mm256_slli_epi32(x, c) +#define vsrl_vi_vi_i(x, c) _mm256_srli_epi32(x, c) +#define vsra_vi_vi_i(x, c) _mm256_srai_epi32(x, c) +//@#define vsll_vi_vi_i(x, c) _mm256_slli_epi32(x, c) +//@#define vsrl_vi_vi_i(x, c) _mm256_srli_epi32(x, c) +//@#define vsra_vi_vi_i(x, c) _mm256_srai_epi32(x, c) + +static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm256_cmpeq_epi32(x, y); } +static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm256_cmpgt_epi32(x, y); } + +static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { + return _mm512_cmp_epi32_mask(_mm512_castsi256_si512(x), _mm512_castsi256_si512(y), _MM_CMPINT_EQ); +} +static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { + return _mm512_cmp_epi32_mask(_mm512_castsi256_si512(y), _mm512_castsi256_si512(x), _MM_CMPINT_LT); +} + +static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) { + return _mm512_mask_blend_pd(mask, y, x); +} + +static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { + return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); +} + +#if 1 +// Probably this is faster +static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + __m512i v = _mm512_castpd_si512(vsel_vd_vo_vd_vd(o0, _mm512_castsi512_pd(_mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0)), + vsel_vd_vo_vd_vd(o1, _mm512_castsi512_pd(_mm512_set_epi64(1, 1, 1, 1, 1, 1, 1, 1)), + vsel_vd_vo_vd_vd(o2, _mm512_castsi512_pd(_mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2)), + _mm512_castsi512_pd(_mm512_set_epi64(3, 3, 3, 3, 3, 3, 3, 3)))))); + return _mm512_permutexvar_pd(v, _mm512_castpd256_pd512(_mm256_set_pd(d3, d2, d1, d0))); +} + +static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2); +} +#else +static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); +} + +static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); +} +#endif + +static INLINE vopmask visinf_vo_vd(vdouble d) { + return _mm512_cmp_pd_mask(vabs_vd_vd(d), _mm512_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ); +} + +static INLINE vopmask vispinf_vo_vd(vdouble d) { + return _mm512_cmp_pd_mask(d, _mm512_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ); +} + +static INLINE vopmask visminf_vo_vd(vdouble d) { + return _mm512_cmp_pd_mask(d, _mm512_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ); +} + +static INLINE vopmask visnan_vo_vd(vdouble d) { + return _mm512_cmp_pd_mask(d, d, _CMP_NEQ_UQ); +} + +static INLINE vint vilogbk_vi_vd(vdouble d) { return vrint_vi_vd(_mm512_getexp_pd(d)); } + +// vilogb2k_vi_vd is similar to vilogbk_vi_vd, but the argument has to +// be a normalized FP value. +static INLINE vint vilogb2k_vi_vd(vdouble d) { return vrint_vi_vd(_mm512_getexp_pd(d)); } + +static INLINE vdouble vgetexp_vd_vd(vdouble d) { return _mm512_getexp_pd(d); } +static INLINE vfloat vgetexp_vf_vf(vfloat d) { return _mm512_getexp_ps(d); } + +static INLINE vdouble vgetmant_vd_vd(vdouble d) { return _mm512_getmant_pd(d, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan); } +static INLINE vfloat vgetmant_vf_vf(vfloat d) { return _mm512_getmant_ps(d, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan); } + +#define vfixup_vd_vd_vd_vi2_i(a, b, c, imm) _mm512_fixupimm_pd((a), (b), (c), (imm)) +#define vfixup_vf_vf_vf_vi2_i(a, b, c, imm) _mm512_fixupimm_ps((a), (b), (c), (imm)) +//@#define vfixup_vd_vd_vd_vi2_i(a, b, c, imm) _mm512_fixupimm_pd((a), (b), (c), (imm)) +//@#define vfixup_vf_vf_vf_vi2_i(a, b, c, imm) _mm512_fixupimm_ps((a), (b), (c), (imm)) + +#if defined(_MSC_VER) +// This function is needed when debugging on MSVC. +static INLINE double vcast_d_vd(vdouble v) { + double s[VECTLENDP]; + _mm512_storeu_pd(s, v); + return s[0]; +} +#endif + +static INLINE vdouble vload_vd_p(const double *ptr) { return _mm512_load_pd(ptr); } +static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm512_loadu_pd(ptr); } + +static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm512_store_pd(ptr, v); } +static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm512_storeu_pd(ptr, v); } + +static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return _mm512_i32gather_pd(vi, ptr, 8); } + +// + +static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { + return _mm512_castsi512_si256(_mm512_mask_blend_epi32(m, _mm512_castsi256_si512(y), _mm512_castsi256_si512(x))); +} + +// + +static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm512_castps_si512(vf); } +static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm512_castsi512_ps(vm); } +static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return _mm512_castsi512_ps(vi); } +static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return _mm512_castps_si512(vf); } + +static INLINE vdouble vreinterpret_vd_vf(vfloat vf) { return _mm512_castps_pd(vf); } +static INLINE vfloat vreinterpret_vf_vd(vdouble vd) { return _mm512_castpd_ps(vd); } + +static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } +static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } + +static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm512_cvtepi32_ps(vcast_vm_vi2(vi)); } +static INLINE vfloat vcast_vf_f(float f) { return _mm512_set1_ps(f); } +static INLINE vint2 vcast_vi2_i(int i) { return _mm512_set1_epi32(i); } +static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm512_cvtps_epi32(vf)); } +static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm512_cvttps_epi32(vf)); } + +static INLINE vfloat vtruncate_vf_vf(vfloat vd) { + return _mm512_roundscale_ps(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); +} + +static INLINE vfloat vrint_vf_vf(vfloat vd) { + return _mm512_roundscale_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); +} + +static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm512_add_ps(x, y); } +static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm512_sub_ps(x, y); } +static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm512_mul_ps(x, y); } +static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm512_div_ps(x, y); } +static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } +static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm512_sqrt_ps(x); } +static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } +static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } +static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm512_max_ps(x, y); } +static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm512_min_ps(x, y); } + +#if CONFIG == 1 +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmadd_ps(x, y, z); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmsub_ps(x, y, z); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fnmadd_ps(x, y, z); } +#else +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +#endif + +static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmadd_ps(x, y, z); } +static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmadd_ps(x, y, z); } +static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmsub_ps(x, y, z); } +static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fnmadd_ps(x, y, z); } +static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fnmsub_ps(x, y, z); } + +static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_EQ_OQ); } +static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_NEQ_UQ); } +static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_LT_OQ); } +static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_LE_OQ); } +static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_GT_OQ); } +static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_GE_OQ); } + +static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_add_epi32(x, y); } +static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_sub_epi32(x, y); } +static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } +static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_and_si512(x, y); } +static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_andnot_si512(x, y); } +static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_or_si512(x, y); } +static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_xor_si512(x, y); } + +static INLINE vint2 vand_vi2_vo_vi2(vopmask o, vint2 m) { + return _mm512_mask_and_epi32(_mm512_set1_epi32(0), o, m, m); +} + +static INLINE vint2 vandnot_vi2_vo_vi2(vopmask o, vint2 m) { + return _mm512_mask_and_epi32(m, o, _mm512_set1_epi32(0), _mm512_set1_epi32(0)); +} + +#define vsll_vi2_vi2_i(x, c) _mm512_slli_epi32(x, c) +#define vsrl_vi2_vi2_i(x, c) _mm512_srli_epi32(x, c) +#define vsra_vi2_vi2_i(x, c) _mm512_srai_epi32(x, c) +//@#define vsll_vi2_vi2_i(x, c) _mm512_slli_epi32(x, c) +//@#define vsrl_vi2_vi2_i(x, c) _mm512_srli_epi32(x, c) +//@#define vsra_vi2_vi2_i(x, c) _mm512_srai_epi32(x, c) +static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpeq_epi32_mask(x, y); } +static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpgt_epi32_mask(x, y); } + +static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { + __mmask16 m = _mm512_cmp_epi32_mask(x, y, _MM_CMPINT_EQ); + return _mm512_mask_and_epi32(_mm512_set1_epi32(0), m, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); +} +static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { + __mmask16 m = _mm512_cmp_epi32_mask(y, x, _MM_CMPINT_LT); + return _mm512_mask_and_epi32(_mm512_set1_epi32(0), m, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); +} + +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { + return _mm512_mask_blend_epi32(m, y, x); +} + +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask m, vfloat x, vfloat y) { + return _mm512_mask_blend_ps(m, y, x); +} + +// At this point, the following three functions are implemented in a generic way, +// but I will try target-specific optimization later on. +static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { + return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); +} + +static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); +} + +static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); +} + +static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } +static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } + +static INLINE vint2 vilogbk_vi2_vf(vfloat d) { return vrint_vi2_vf(_mm512_getexp_ps(d)); } +static INLINE vint2 vilogb2k_vi2_vf(vfloat d) { return vrint_vi2_vf(_mm512_getexp_ps(d)); } + +#ifdef _MSC_VER +// This function is needed when debugging on MSVC. +static INLINE float vcast_f_vf(vfloat v) { + float s[VECTLENSP]; + _mm512_storeu_ps(s, v); + return s[0]; +} +#endif + +static INLINE vfloat vload_vf_p(const float *ptr) { return _mm512_load_ps(ptr); } +static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm512_loadu_ps(ptr); } + +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm512_store_ps(ptr, v); } +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm512_storeu_ps(ptr, v); } + +static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm512_i32gather_ps(vi2, ptr, 4); } + +// + +static INLINE vdouble vposneg_vd_vd(vdouble d) { + return vreinterpret_vd_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vd(d), 0xcccc, vreinterpret_vm_vd(d), vreinterpret_vm_vd(_mm512_set1_pd(-0.0)))); +} +static INLINE vdouble vnegpos_vd_vd(vdouble d) { + return vreinterpret_vd_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vd(d), 0x3333, vreinterpret_vm_vd(d), vreinterpret_vm_vd(_mm512_set1_pd(-0.0)))); +} +static INLINE vfloat vposneg_vf_vf(vfloat d) { + return vreinterpret_vf_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vf(d), 0xaaaa, vreinterpret_vm_vf(d), vreinterpret_vm_vf(_mm512_set1_ps(-0.0f)))); +} +static INLINE vfloat vnegpos_vf_vf(vfloat d) { + return vreinterpret_vf_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vf(d), 0x5555, vreinterpret_vm_vf(d), vreinterpret_vm_vf(_mm512_set1_ps(-0.0f)))); +} + +static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } +static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } + +static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmaddsub_pd(x, y, z); } +static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmaddsub_ps(x, y, z); } + +static INLINE vdouble vrev21_vd_vd(vdouble vd) { return _mm512_permute_pd(vd, 0x55); } + +static INLINE vdouble vreva2_vd_vd(vdouble vd) { + return vreinterpret_vd_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12), vreinterpret_vm_vd(vd))); +} + +static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm512_stream_pd(ptr, v); } + +static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { + _mm_store_pd(&ptr[(offset + step * 0)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 0))); + _mm_store_pd(&ptr[(offset + step * 1)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 1))); + _mm_store_pd(&ptr[(offset + step * 2)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 2))); + _mm_store_pd(&ptr[(offset + step * 3)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 3))); +} + +static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { + _mm_stream_pd(&ptr[(offset + step * 0)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 0))); + _mm_stream_pd(&ptr[(offset + step * 1)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 1))); + _mm_stream_pd(&ptr[(offset + step * 2)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 2))); + _mm_stream_pd(&ptr[(offset + step * 3)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 3))); +} + +// + +static INLINE vfloat vrev21_vf_vf(vfloat vf) { return _mm512_permute_ps(vf, 0xb1); } +static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } + +static INLINE vfloat vreva2_vf_vf(vfloat vf) { + return vreinterpret_vf_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14), vreinterpret_vm_vf(vf))); +} + +static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm512_stream_ps(ptr, v); } + +static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 0))); + _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 0))); + _mm_storel_pd((double *)(ptr+(offset + step * 2)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 1))); + _mm_storeh_pd((double *)(ptr+(offset + step * 3)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 1))); + _mm_storel_pd((double *)(ptr+(offset + step * 4)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 2))); + _mm_storeh_pd((double *)(ptr+(offset + step * 5)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 2))); + _mm_storel_pd((double *)(ptr+(offset + step * 6)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 3))); + _mm_storeh_pd((double *)(ptr+(offset + step * 7)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 3))); +} + +static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } + +// + +static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { + return (vmask2) { _mm512_unpacklo_epi64(v.x, v.y), _mm512_unpackhi_epi64(v.x, v.y) }; +} + +static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { + return (vmask2) { _mm512_unpacklo_epi64(v.x, v.y), _mm512_unpackhi_epi64(v.x, v.y) }; +} + +static INLINE vint vuninterleave_vi_vi(vint v) { + return _mm256_permutevar8x32_epi32(v, _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0)); +} + +static INLINE vdouble vinterleave_vd_vd(vdouble vd) { + return vreinterpret_vd_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0), vreinterpret_vm_vd(vd))); +} + +static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { + return vreinterpret_vd_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0), vreinterpret_vm_vd(vd))); +} + +static INLINE vmask vinterleave_vm_vm(vmask vm) { + return _mm512_permutexvar_epi32(_mm512_set_epi32(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0), vm); +} + +static INLINE vmask vuninterleave_vm_vm(vmask vm) { + return _mm512_permutexvar_epi32(_mm512_set_epi32(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0), vm); +} + +static vmask2 vloadu_vm2_p(void *p) { + vmask2 vm2; + memcpy(&vm2, p, VECTLENDP * 16); + return vm2; +} + +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad8 vargquad; + +static INLINE vmask2 vcast_vm2_aq(vargquad aq) { + return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); +} + +static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { + vm2 = vuninterleave_vm2_vm2(vm2); + vargquad aq; + memcpy(&aq, &vm2, VECTLENDP * 16); + return aq; +} +#endif // #if !defined(SLEEF_GENHEADER) + +#ifdef __INTEL_COMPILER +static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0; } +#else +static INLINE int vtestallzeros_i_vo64(vopmask g) { return g == 0; } +#endif + +static INLINE vmask vsel_vm_vo64_vm_vm(vopmask m, vmask x, vmask y) { return _mm512_mask_blend_epi64(m, y, x); } + +static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return _mm512_sub_epi64(x, y); } +static INLINE vmask vneg64_vm_vm(vmask x) { return _mm512_sub_epi64(vcast_vm_i_i(0, 0), x); } +static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm512_cmp_epi64_mask(y, x, _MM_CMPINT_LT); } // signed compare + +#define vsll64_vm_vm_i(x, c) _mm512_slli_epi64(x, c) +#define vsrl64_vm_vm_i(x, c) _mm512_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm512_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm512_srli_epi64(x, c) + +static INLINE vmask vcast_vm_vi(vint vi) { + return _mm512_cvtepi32_epi64(vi); +} +static INLINE vint vcast_vi_vm(vmask vm) { + return _mm512_cvtepi64_epi32(vm); +} diff --git a/src/helperneon32.h b/src/helperneon32.h new file mode 100644 index 00000000..ccbafd74 --- /dev/null +++ b/src/helperneon32.h @@ -0,0 +1,298 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#ifndef __ARM_NEON +#error Please specify -mfpu=neon. +#endif + +#ifdef __aarch64__ +#warning This implementation is for AARCH32. +#endif + +#define ENABLE_SP +//@#define ENABLE_SP +#define LOG2VECTLENSP 2 +//@#define LOG2VECTLENSP 2 +#define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) + +#if CONFIG == 4 +#define ISANAME "AARCH32 NEON-VFPV4" +#define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP +#else +#define ISANAME "AARCH32 NEON" +#endif +#define DFTPRIORITY 10 + +#define ENABLE_RECSQRT_SP +//@#define ENABLE_RECSQRT_SP + +#include +#include + +#include "misc.h" + +typedef uint32x4_t vmask; +typedef uint32x4_t vopmask; + +//typedef int32x4_t vint; + +typedef float32x4_t vfloat; +typedef int32x4_t vint2; + +// + +static INLINE void vprefetch_v_p(const void *ptr) { } + +static INLINE int vtestallones_i_vo32(vopmask g) { + uint32x2_t x0 = vand_u32(vget_low_u32(g), vget_high_u32(g)); + uint32x2_t x1 = vpmin_u32(x0, x0); + return vget_lane_u32(x1, 0); +} + +static vfloat vloaduf(float *p) { return vld1q_f32(p); } +static void vstoreuf(float *p, vfloat v) { vst1q_f32(p, v); } + +static vint2 vloadu_vi2_p(int32_t *p) { return vld1q_s32(p); } +static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { vst1q_s32(p, v); } + +// + +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vandq_u32(x, y); } +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vbicq_u32(y, x); } +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vorrq_u32(x, y); } +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return veorq_u32(x, y); } + +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vandq_u32(x, y); } +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vbicq_u32(y, x); } +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vorrq_u32(x, y); } +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return veorq_u32(x, y); } + +static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vandq_u32(x, y); } +static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } +static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } +static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return veorq_u32(x, y); } + +static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vandq_u32(x, y); } +static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } +static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } +static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return veorq_u32(x, y); } + +static INLINE vopmask vcast_vo32_vo64(vopmask m) { return vuzpq_u32(m, m).val[0]; } +static INLINE vopmask vcast_vo64_vo32(vopmask m) { return vzipq_u32(m, m).val[0]; } + +// + +static INLINE vmask vcast_vm_i_i(int i0, int i1) { return (vmask)vdupq_n_u64((uint64_t)i0 | (((uint64_t)i1) << 32)); } +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { + uint32x4_t t = vceqq_u32(x, y); + return vandq_u32(t, vrev64q_u32(t)); +} + +// + +static INLINE vint2 vcast_vi2_vm(vmask vm) { return (vint2)vm; } +static INLINE vmask vcast_vm_vi2(vint2 vi) { return (vmask)vi; } +static INLINE vint2 vrint_vi2_vf(vfloat d) { + return vcvtq_s32_f32(vaddq_f32(d, (float32x4_t)vorrq_u32(vandq_u32((uint32x4_t)d, (uint32x4_t)vdupq_n_f32(-0.0f)), (uint32x4_t)vdupq_n_f32(0.5f)))); +} +static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcvtq_s32_f32(vf); } +static INLINE vfloat vcast_vf_vi2(vint2 vi) { return vcvtq_f32_s32(vi); } + +static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_vi2_vf(vd)); } +static INLINE vfloat vrint_vf_vf(vfloat vd) { return vcast_vf_vi2(vrint_vi2_vf(vd)); } + +static INLINE vfloat vcast_vf_f(float f) { return vdupq_n_f32(f); } +static INLINE vint2 vcast_vi2_i(int i) { return vdupq_n_s32(i); } +static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (vmask)vf; } +static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (vfloat)vm; } +static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return (vfloat)vm; } +static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return (vint2)vf; } + +static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return vaddq_f32(x, y); } +static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return vsubq_f32(x, y); } +static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return vmulq_f32(x, y); } + +static INLINE vfloat vabs_vf_vf(vfloat f) { return vabsq_f32(f); } +static INLINE vfloat vneg_vf_vf(vfloat f) { return vnegq_f32(f); } +#if CONFIG == 4 +static INLINE vfloat vmla_vf_vf_vf_vf (vfloat x, vfloat y, vfloat z) { return vfmaq_f32(z, x, y); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfmsq_f32(z, x, y); } +static INLINE vfloat vfma_vf_vf_vf_vf (vfloat x, vfloat y, vfloat z) { return vfmaq_f32(z, x, y); } +static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfmsq_f32(z, x, y); } +static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vneg_vf_vf(vfmanp_vf_vf_vf_vf(x, y, z)); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vneg_vf_vf(vfmanp_vf_vf_vf_vf(x, y, z)); } + +static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { + float32x4_t t = vrecpeq_f32(y), u; + t = vmulq_f32(t, vrecpsq_f32(y, t)); + t = vfmaq_f32(t, vfmsq_f32(vdupq_n_f32(1.0f), y, t), t); + u = vmulq_f32(x, t); + return vfmaq_f32(u, vfmsq_f32(x, y, u), t); +} + +static INLINE vfloat vsqrt_vf_vf(vfloat d) { + float32x4_t x = vrsqrteq_f32(d); + x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); + x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); + float32x4_t u = vmulq_f32(x, d); + u = vfmaq_f32(u, vfmsq_f32(d, u, u), vmulq_f32(x, vdupq_n_f32(0.5))); + return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(u), vceqq_f32(d, vdupq_n_f32(0.0f)))); +} + +static INLINE vfloat vrec_vf_vf(vfloat y) { + float32x4_t t = vrecpeq_f32(y), u; + t = vmulq_f32(t, vrecpsq_f32(y, t)); + t = vfmaq_f32(t, vfmsq_f32(vdupq_n_f32(1.0f), y, t), t); + return vfmaq_f32(t, vfmsq_f32(vdupq_n_f32(1.0f), y, t), t); +} + +static INLINE vfloat vrecsqrt_vf_vf(vfloat d) { + float32x4_t x = vrsqrteq_f32(d); + x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); + return vfmaq_f32(x, vfmsq_f32(vdupq_n_f32(1), x, vmulq_f32(x, d)), vmulq_f32(x, vdupq_n_f32(0.5))); +} +#else // #if CONFIG == 4 +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmlaq_f32(z, x, y); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmlsq_f32(z, x, y); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vneg_vf_vf(vmlsq_f32(z, x, y)); } + +static INLINE vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) { + float32x4_t x = vrecpeq_f32(d); + x = vmulq_f32(x, vrecpsq_f32(d, x)); + float32x4_t t = vmulq_f32(n, x); + return vmlsq_f32(vaddq_f32(t, t), vmulq_f32(t, x), d); +} + +static INLINE vfloat vsqrt_vf_vf(vfloat d) { + float32x4_t x = vrsqrteq_f32(d); + x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); + float32x4_t u = vmulq_f32(x, d); + u = vmlaq_f32(u, vmlsq_f32(d, u, u), vmulq_f32(x, vdupq_n_f32(0.5))); + return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(u), vceqq_f32(d, vdupq_n_f32(0.0f)))); +} + +static INLINE vfloat vrec_vf_vf(vfloat d) { + float32x4_t x = vrecpeq_f32(d); + x = vmulq_f32(x, vrecpsq_f32(d, x)); + return vmlsq_f32(vaddq_f32(x, x), vmulq_f32(x, x), d); +} + +static INLINE vfloat vrecsqrt_vf_vf(vfloat d) { + float32x4_t x = vrsqrteq_f32(d); + x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); + return vmlaq_f32(x, vmlsq_f32(vdupq_n_f32(1), x, vmulq_f32(x, d)), vmulq_f32(x, vdupq_n_f32(0.5))); +} +#endif // #if CONFIG == 4 +static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return vmaxq_f32(x, y); } +static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vminq_f32(x, y); } + +static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); } +static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vmvnq_u32(vceqq_f32(x, y)); } +static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vcltq_f32(x, y); } +static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vcleq_f32(x, y); } +static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vcgtq_f32(x, y); } +static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vcgeq_f32(x, y); } + +static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vaddq_s32(x, y); } +static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsubq_s32(x, y); } +static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vnegq_s32(e); } + +static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vandq_s32(x, y); } +static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vbicq_s32(y, x); } +static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vorrq_s32(x, y); } +static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return veorq_s32(x, y); } + +static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vandq_u32(x, (vopmask)y); } +static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vbicq_u32((vopmask)y, x); } + +#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) +#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) +#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) +//@#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) +//@#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) +//@#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) + +static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } +static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return vcgtq_s32(x, y); } +static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return (vint2)vceqq_s32(x, y); } +static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return (vint2)vcgtq_s32(x, y); } + +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return (vint2)vbslq_u32(m, (vmask)x, (vmask)y); } + +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { + return (vfloat)vbslq_u32(mask, (vmask)x, (vmask)y); +} + +static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { + return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); +} + +static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); +} + +static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); +} + +static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } +static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } + +// This function is needed when debugging on MSVC. +static INLINE float vcast_f_vf(vfloat v) { + float p[4]; + vst1q_f32 (p, v); + return p[0]; +} + +static INLINE int vavailability_i(int name) { + if (name != 2) return 0; + return vcast_f_vf(vadd_vf_vf_vf(vcast_vf_f(name), vcast_vf_f(name))) != 0.0; +} + + +static INLINE vfloat vload_vf_p(const float *ptr) { return vld1q_f32(__builtin_assume_aligned(ptr, 16)); } +static INLINE vfloat vloadu_vf_p(const float *ptr) { return vld1q_f32(ptr); } + +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32(__builtin_assume_aligned(ptr, 16), v); } +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } + +static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { + return ((vfloat) { + ptr[vgetq_lane_s32(vi2, 0)], + ptr[vgetq_lane_s32(vi2, 1)], + ptr[vgetq_lane_s32(vi2, 2)], + ptr[vgetq_lane_s32(vi2, 3)] + }); +} + +#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) +#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) + +static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)PNMASKf); } +static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)NPMASKf); } + +static INLINE vfloat vsubadd_vf_vf_vf(vfloat d0, vfloat d1) { return vadd_vf_vf_vf(d0, vnegpos_vf_vf(d1)); } +static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } + +static INLINE vfloat vrev21_vf_vf(vfloat d0) { return vrev64q_f32(d0); } +static INLINE vfloat vreva2_vf_vf(vfloat d0) { return vcombine_f32(vget_high_f32(d0), vget_low_f32(d0)); } +static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } + +static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } + +static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); + vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); +} + +static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); + vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); +} diff --git a/src/helperpower_128.h b/src/helperpower_128.h new file mode 100644 index 00000000..b5470b41 --- /dev/null +++ b/src/helperpower_128.h @@ -0,0 +1,790 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#if CONFIG == 1 || CONFIG == 2 + +#ifndef __VSX__ +#error Please specify -mcpu=power8 or -mcpu=power9 +#endif + +#else +#error CONFIG macro invalid or not defined +#endif + +#define ENABLE_DP +//@#define ENABLE_DP +#define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 +#define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) + +#define ENABLE_SP +//@#define ENABLE_SP +#define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) +#define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) + +#if CONFIG == 1 +#define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP +#define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP +#endif + +#define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING + +#if !defined(SLEEF_GENHEADER) +#include +// undef altivec types since CPP and C99 use them as compiler tokens +// use __vector and __bool instead +#undef vector +#undef bool + +#include +#include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) + +#define ISANAME "VSX" +#define DFTPRIORITY 25 + +static INLINE int vavailability_i(int name) { return 3; } +static INLINE void vprefetch_v_p(const void *ptr) { } + +/********************************************** + ** Types +***********************************************/ +typedef __vector unsigned int vmask; +// using __bool with typedef may cause ambiguous errors +#define vopmask __vector __bool int +//@#define vopmask __vector __bool int +typedef __vector signed int vint; +typedef __vector signed int vint2; +typedef __vector float vfloat; +typedef __vector double vdouble; + +// internal use types +typedef __vector unsigned int v__u32; +typedef __vector unsigned char v__u8; +typedef __vector signed long long v__i64; +typedef __vector unsigned long long v__u64; +#define v__b64 __vector __bool long long + +/********************************************** + ** Utilities +***********************************************/ +#define vset__vi(v0, v1) ((vint) {v0, v1, v0, v1}) +#define vset__vi2(...) ((vint2) {__VA_ARGS__}) +#define vset__vm(...) ((vmask) {__VA_ARGS__}) +#define vset__vo(...) ((vopmask) {__VA_ARGS__}) +#define vset__vf(...) ((vfloat) {__VA_ARGS__}) +#define vset__vd(...) ((vdouble) {__VA_ARGS__}) +#define vset__u8(...) ((v__u8) {__VA_ARGS__}) +#define vset__u32(...) ((v__u32) {__VA_ARGS__}) +#define vset__s64(...) ((v__i64) {__VA_ARGS__}) +#define vset__u64(...) ((v__u64) {__VA_ARGS__}) + +#define vsetall__vi(v) vset__vi(v, v) +#define vsetall__vi2(v) vset__vi2(v, v, v, v) +#define vsetall__vm(v) vset__vm(v, v, v, v) +#define vsetall__vo(v) vset__vo(v, v, v, v) +#define vsetall__vf(v) vset__vf(v, v, v, v) +#define vsetall__vd(v) vset__vd(v, v) +#define vsetall__u8(v) vset__u8(v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v) +#define vsetall__u32(v) vset__u32(v, v, v, v) +#define vsetall__s64(v) vset__s64(v, v) +#define vsetall__u64(v) vset__u64(v, v) + +#define vzero__vi() vsetall__vi(0) +#define vzero__vi2() vsetall__vi2(0) +#define vzero__vm() vsetall__vm(0) +#define vzero__vo() vsetall__vo(0) +#define vzero__vf() vsetall__vf(0) +#define vzero__vd() vsetall__vd(0) +#define vzero__u8() vsetall__u8(0) +#define vzero__u32() vsetall__u32(0) +#define vzero__s64() vsetall__s64(0) +#define vzero__u64() vsetall__u64(0) + +//// Swap doubleword elements +#ifdef __clang__ + static INLINE v__u64 v__swapd_u64(v__u64 v) + { return vec_xxpermdi(v, v, 2); } +#else + static INLINE v__u64 v__swapd_u64(v__u64 v) + { + __asm__ __volatile__("xxswapd %x0,%x1" : "=wa" (v) : "wa" (v)); + return v; + } +#endif + +/********************************************** + ** Memory +***********************************************/ + +////////////// Unaligned memory access ////////////// +/** + * It's not safe to use vector assignment via (cast & dereference) for unaligned memory access + * with almost all clang versions and gcc8 when VSX3 isn't enabled, + * these compilers tends to generate instructions 'lvx/stvx' instead of 'lxvd2x/lxvw4x/stxvd2x/stxvw4x' + * for more information check https://github.com/seiko2plus/vsx_mem_test + * + * TODO: check GCC(9, 10) +*/ +//// load +#if defined(__POWER9_VECTOR__) || (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8) +static vint vloadu_vi_p(const int32_t *ptr) +{ return *((vint*)ptr); } +static INLINE vint2 vloadu_vi2_p(const int32_t *ptr) +{ return *((vint2*)ptr); } +static INLINE vfloat vloadu_vf_p(const float *ptr) +{ return *((vfloat*)ptr); } +static INLINE vdouble vloadu_vd_p(const double *ptr) +{ return *((vdouble*)ptr); } +#else +static vint vloadu_vi_p(const int32_t *ptr) +{ return vec_vsx_ld(0, ptr); } +static INLINE vint2 vloadu_vi2_p(const int32_t *ptr) +{ return vec_vsx_ld(0, ptr); } +static INLINE vfloat vloadu_vf_p(const float *ptr) +{ return vec_vsx_ld(0, ptr); } +static INLINE vdouble vloadu_vd_p(const double *ptr) +{ return vec_vsx_ld(0, ptr); } +#endif + +//// store +#if defined(__POWER9_VECTOR__) || (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8) +static void vstoreu_v_p_vi(int32_t *ptr, vint v) +{ *((vint*)ptr) = v; } +static void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) +{ *((vint2*)ptr) = v; } +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) +{ *((vfloat*)ptr) = v; } +static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) +{ *((vdouble*)ptr) = v; } +#else +static void vstoreu_v_p_vi(int32_t *ptr, vint v) +{ vec_vsx_st(v, 0, ptr); } +static void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) +{ vec_vsx_st(v, 0, ptr); } +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) +{ vec_vsx_st(v, 0, ptr); } +static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) +{ vec_vsx_st(v, 0, ptr); } +#endif + +////////////// aligned memory access ////////////// +//// load +static INLINE vfloat vload_vf_p(const float *ptr) +{ return vec_ld(0, ptr); } +static INLINE vdouble vload_vd_p(const double *ptr) +{ return *((vdouble*)ptr); } + +//// store +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) +{ vec_st(v, 0, ptr); } +static INLINE void vstore_v_p_vd(double *ptr, vdouble v) +{ *((vdouble*)ptr) = v; } + +////////////// non-temporal memory access ////////////// +//// store +static INLINE void vstream_v_p_vf(float *ptr, vfloat v) +{ vstore_v_p_vf(ptr, v); } +static INLINE void vstream_v_p_vd(double *ptr, vdouble v) +{ vstore_v_p_vd(ptr, v); } + +////////////// LUT ////////////// +//// load +static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) +{ return vset__vd(ptr[vec_extract(vi, 0)], ptr[vec_extract(vi, 1)]); } + +static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) +{ + return vset__vf( + ptr[vec_extract(vi2, 0)], ptr[vec_extract(vi2, 1)], + ptr[vec_extract(vi2, 2)], ptr[vec_extract(vi2, 3)] + ); +} + +//// store +static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) +{ + const v__u64 vll = (v__u64)v; + float *ptr_low = ptr + offset*2; + float *ptr_high = ptr + (offset + step)*2; + *((uint64_t*)ptr_low) = vec_extract(vll, 0); + *((uint64_t*)ptr_high) = vec_extract(vll, 1); +} + +static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) +{ vscatter2_v_p_i_i_vf(ptr, offset, step, v); } + +static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) +{ vstore_v_p_vd((double *)(&ptr[2*offset]), v); } + +static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) +{ vscatter2_v_p_i_i_vd(ptr, offset, step, v); } + +/********************************************** + ** Misc + **********************************************/ + +// vector with a specific value set to all lanes (Vector Splat) +static INLINE vint vcast_vi_i(int i) +{ return vsetall__vi(i); } +static INLINE vint2 vcast_vi2_i(int i) +{ return vsetall__vi2(i); } +static INLINE vfloat vcast_vf_f(float f) +{ return vsetall__vf(f); } +static INLINE vdouble vcast_vd_d(double d) +{ return vsetall__vd(d); } +// cast +static INLINE vint2 vcast_vi2_vm(vmask vm) +{ return (vint2)vm; } +static INLINE vmask vcast_vm_vi2(vint2 vi) +{ return (vmask)vi; } +// get the first element +static INLINE float vcast_f_vf(vfloat v) +{ return vec_extract(v, 0); } +static INLINE double vcast_d_vd(vdouble v) +{ return vec_extract(v, 0); } + +static INLINE vmask vreinterpret_vm_vd(vdouble vd) +{ return (vmask)vd; } +static INLINE vdouble vreinterpret_vd_vm(vmask vm) +{ return (vdouble)vm; } +static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) +{ return (vint2)vd; } +static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) +{ return (vdouble)vi; } + +static INLINE vmask vreinterpret_vm_vf(vfloat vf) +{ return (vmask)vf; } +static INLINE vfloat vreinterpret_vf_vm(vmask vm) +{ return (vfloat)vm; } +static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) +{ return (vfloat)vi; } +static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) +{ return (vint2)vf; } + +// per element select via mask (blend) +static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) +{ return vec_sel(y, x, (v__b64)o); } +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) +{ return vec_sel(y, x, o); } + +static INLINE vint vsel_vi_vo_vi_vi(vopmask o, vint x, vint y) +{ return vec_sel(y, x, o); } + +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask o, vint2 x, vint2 y) +{ return vec_sel(y, x, o); } + +static INLINE vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) +{ + return vsel_vf_vo_vf_vf(o, vsetall__vf(v1), vsetall__vf(v0)); +} +static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) +{ + return vsel_vf_vo_vf_vf(o0, vsetall__vf(d0), vsel_vf_vo_f_f(o1, d1, d2)); +} +static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) +{ + return vsel_vf_vo_vf_vf(o0, vsetall__vf(d0), vsel_vf_vo_vf_vf(o1, vsetall__vf(d1), vsel_vf_vo_f_f(o2, d2, d3))); +} + +static INLINE vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) +{ + return vsel_vd_vo_vd_vd(o, vsetall__vd(v1), vsetall__vd(v0)); +} +static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) +{ + return vsel_vd_vo_vd_vd(o0, vsetall__vd(d0), vsel_vd_vo_d_d(o1, d1, d2)); +} +static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) +{ + return vsel_vd_vo_vd_vd(o0, vsetall__vd(d0), vsel_vd_vo_vd_vd(o1, vsetall__vd(d1), vsel_vd_vo_d_d(o2, d2, d3))); +} + +static INLINE int vtestallones_i_vo32(vopmask g) +{ return vec_all_ne((vint2)g, vzero__vi2()); } +static INLINE int vtestallones_i_vo64(vopmask g) +{ return vec_all_ne((v__i64)g, vzero__s64()); } + +/********************************************** + ** Conversions + **********************************************/ + +////////////// Numeric ////////////// +// pack 64-bit mask to 32-bit +static INLINE vopmask vcast_vo32_vo64(vopmask m) +{ return (vopmask)vec_pack((v__u64)m, (v__u64)m); } +// clip 64-bit lanes to lower 32-bit +static INLINE vint vcastu_vi_vi2(vint2 vi2) +{ return vec_mergeo(vi2, vec_splat(vi2, 3)); } + +// expand lower 32-bit mask +static INLINE vopmask vcast_vo64_vo32(vopmask m) +{ return vec_mergeh(m, m); } +// unsigned expand lower 32-bit integer +static INLINE vint2 vcastu_vi2_vi(vint vi) +{ return vec_mergeh(vzero__vi(), vi); } + +// signed int to single-precision +static INLINE vfloat vcast_vf_vi2(vint2 vi) +{ + vfloat ret; +#ifdef __clang__ + ret = __builtin_convertvector(vi, vfloat); +#else + __asm__ __volatile__("xvcvsxwsp %x0,%x1" : "=wa" (ret) : "wa" (vi)); +#endif + return ret; +} + +// lower signed int to double-precision +static INLINE vdouble vcast_vd_vi(vint vi) +{ + vdouble ret; + vint swap = vec_mergeh(vi, vi); +#ifdef __clang__ + ret = __builtin_vsx_xvcvsxwdp(swap); +#else + __asm__ __volatile__("xvcvsxwdp %x0,%x1" : "=wa" (ret) : "wa" (swap)); +#endif + return ret; +} + +// zip two scalars +static INLINE vmask vcast_vm_i_i(int l, int h) +{ return (vmask)vec_mergeh(vsetall__vi2(h), vsetall__vi2(l)); } + +////////////// Truncation ////////////// + +static INLINE vint2 vtruncate_vi2_vf(vfloat vf) +{ + vint2 ret; +#ifdef __clang__ + ret = __builtin_convertvector(vf, vint2); +#else + __asm__ __volatile__("xvcvspsxws %x0,%x1" : "=wa" (ret) : "wa" (vf)); +#endif + return ret; +} + +static INLINE vint vtruncate_vi_vd(vdouble vd) +{ + vint ret; +#ifdef __clang__ + ret = __builtin_vsx_xvcvdpsxws(vd); +#else + __asm__ __volatile__("xvcvdpsxws %x0,%x1" : "=wa" (ret) : "wa" (vd)); +#endif + return vec_mergeo(ret, vec_splat(ret, 3)); +} + +static INLINE vdouble vtruncate_vd_vd(vdouble vd) +{ return vec_trunc(vd); } +static INLINE vfloat vtruncate_vf_vf(vfloat vf) +{ return vec_trunc(vf); } + +////////////// Rounding ////////////// + +// towards the nearest even +static INLINE vint vrint_vi_vd(vdouble vd) +{ return vtruncate_vi_vd(vec_rint(vd)); } +static INLINE vint2 vrint_vi2_vf(vfloat vf) +{ return vtruncate_vi2_vf(vec_rint(vf)); } +static INLINE vdouble vrint_vd_vd(vdouble vd) +{ return vec_rint(vd); } +static INLINE vfloat vrint_vf_vf(vfloat vf) +{ return vec_rint(vf); } + +/********************************************** + ** Logical + **********************************************/ + +////////////// And ////////////// +static INLINE vint vand_vi_vi_vi(vint x, vint y) +{ return vec_and(x, y); } +static INLINE vint vand_vi_vo_vi(vopmask x, vint y) +{ return vec_and((vint)x, y); } +static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) +{ return vec_and(x, y); } +static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) +{ return (vint2)vec_and((vint2)x, y); } + +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) +{ return vec_and(x, y); } +static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) +{ return vec_and((vmask)x, y); } +static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) +{ return vec_and((vmask)x, y); } +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) +{ return vec_and(x, y); } + +////////////// Or ////////////// +static INLINE vint vor_vi_vi_vi(vint x, vint y) +{ return vec_or(x, y); } +static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) +{ return vec_or(x, y); } + +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) +{ return vec_or(x, y); } +static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) +{ return vec_or((vmask)x, y); } +static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) +{ return vec_or((vmask)x, y); } +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) +{ return vec_or(x, y); } + +////////////// Xor ////////////// +static INLINE vint vxor_vi_vi_vi(vint x, vint y) +{ return vec_xor(x, y); } +static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) +{ return vec_xor(x, y); } + +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) +{ return vec_xor(x, y); } +static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) +{ return vec_xor((vmask)x, y); } +static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) +{ return vec_xor((vmask)x, y); } +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) +{ return vec_xor(x, y); } + +////////////// Not ////////////// +static INLINE vopmask vnot_vo_vo(vopmask o) +{ return vec_nor(o, o); } + +////////////// And Not ((~x) & y) ////////////// +static INLINE vint vandnot_vi_vi_vi(vint x, vint y) +{ return vec_andc(y, x); } +static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) +{ return vec_andc(y, (vint)x); } +static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) +{ return vec_andc(y, x); } +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) +{ return vec_andc(y, x); } +static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) +{ return vec_andc(y, x); } +static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) +{ return vec_andc(y, x); } +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) +{ return vec_andc(y, x); } +static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) +{ return vec_andc(y, (vint2)x); } + +/********************************************** + ** Comparison + **********************************************/ + +////////////// Equal ////////////// +static INLINE vint veq_vi_vi_vi(vint x, vint y) +{ return (vint)vec_cmpeq(x, y); } +static INLINE vopmask veq_vo_vi_vi(vint x, vint y) +{ return vec_cmpeq(x, y); } + +static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) +{ return vec_cmpeq(x, y); } +static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) +{ return (vint2)vec_cmpeq(x, y); } + +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) +{ return (vopmask)vec_cmpeq((v__u64)x, (v__u64)y); } + +static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) +{ return vec_cmpeq(x, y); } +static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) +{ return (vopmask)vec_cmpeq(x, y); } + +////////////// Not Equal ////////////// +static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) +{ return vnot_vo_vo(vec_cmpeq(x, y)); } +static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) +{ return vnot_vo_vo((vopmask)vec_cmpeq(x, y)); } + +////////////// Less Than ////////////// +static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) +{ return vec_cmplt(x, y); } +static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) +{ return (vopmask)vec_cmplt(x, y); } + +////////////// Greater Than ////////////// +static INLINE vint vgt_vi_vi_vi(vint x, vint y) +{ return (vint)vec_cmpgt(x, y); } +static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) +{ return vec_cmpgt(x, y);} + +static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) +{ return (vint2)vec_cmpgt(x, y); } +static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) +{ return vec_cmpgt(x, y); } + +static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) +{ return vec_cmpgt(x, y); } +static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) +{ return (vopmask)vec_cmpgt(x, y); } + +////////////// Less Than Or Equal ////////////// +static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) +{ return vec_cmple(x, y); } +static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) +{ return (vopmask)vec_cmple(x, y); } + +////////////// Greater Than Or Equal ////////////// +static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) +{ return vec_cmpge(x, y); } +static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) +{ return (vopmask)vec_cmpge(x, y); } + +////////////// Special Cases ////////////// +static INLINE vopmask visinf_vo_vf(vfloat d) +{ return vec_cmpeq(vec_abs(d), vsetall__vf(SLEEF_INFINITYf)); } +static INLINE vopmask visinf_vo_vd(vdouble d) +{ return (vopmask)vec_cmpeq(vec_abs(d), vsetall__vd(SLEEF_INFINITY)); } + +static INLINE vopmask vispinf_vo_vf(vfloat d) +{ return vec_cmpeq(d, vsetall__vf(SLEEF_INFINITYf)); } +static INLINE vopmask vispinf_vo_vd(vdouble d) +{ return (vopmask)vec_cmpeq(d, vsetall__vd(SLEEF_INFINITY)); } + +static INLINE vopmask visminf_vo_vf(vfloat d) +{ return vec_cmpeq(d, vsetall__vf(-SLEEF_INFINITYf)); } +static INLINE vopmask visminf_vo_vd(vdouble d) +{ return (vopmask)vec_cmpeq(d, vsetall__vd(-SLEEF_INFINITY)); } + +static INLINE vopmask visnan_vo_vf(vfloat d) +{ return vnot_vo_vo(vec_cmpeq(d, d)); } +static INLINE vopmask visnan_vo_vd(vdouble d) +{ return vnot_vo_vo((vopmask)vec_cmpeq(d, d)); } + +/********************************************** + ** Shift + **********************************************/ +////////////// Left ////////////// +static INLINE vint vsll_vi_vi_i(vint x, int c) +{ return vec_sl (x, vsetall__u32(c)); } +static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) +{ return vec_sl(x, vsetall__u32(c)); } + +////////////// Right ////////////// +static INLINE vint vsrl_vi_vi_i(vint x, int c) +{ return vec_sr(x, vsetall__u32(c)); } +static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) +{ return vec_sr(x, vsetall__u32(c)); } + +////////////// Algebraic Right ////////////// +static INLINE vint vsra_vi_vi_i(vint x, int c) +{ return vec_sra(x, vsetall__u32(c)); } +static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) +{ return vec_sra(x, vsetall__u32(c)); } + +/********************************************** + ** Reorder + **********************************************/ + +////////////// Reverse ////////////// +// Reverse elements order inside the lower and higher parts +static INLINE vint2 vrev21_vi2_vi2(vint2 vi) +{ return vec_mergee(vec_mergeo(vi, vi), vi); } +static INLINE vfloat vrev21_vf_vf(vfloat vf) +{ return (vfloat)vrev21_vi2_vi2((vint2)vf); } + +// Swap the lower and higher parts +static INLINE vfloat vreva2_vf_vf(vfloat vf) +{ return (vfloat)v__swapd_u64((v__u64)vf); } +static INLINE vdouble vrev21_vd_vd(vdouble vd) +{ return (vdouble)v__swapd_u64((v__u64)vd); } +static INLINE vdouble vreva2_vd_vd(vdouble vd) +{ return vd; } + +/********************************************** + ** Arithmetic + **********************************************/ + +////////////// Negation ////////////// +static INLINE vint vneg_vi_vi(vint e) { +#ifdef __clang__ + return vec_neg(e); +#else + return vec_sub(vzero__vi(), e); +#endif +} +static INLINE vint2 vneg_vi2_vi2(vint2 e) +{ return vneg_vi_vi(e); } + +static INLINE vfloat vneg_vf_vf(vfloat d) +{ + vfloat ret; +#ifdef __clang__ + ret = vec_neg(d); +#else + __asm__ __volatile__("xvnegsp %x0,%x1" : "=wa" (ret) : "wa" (d)); +#endif + return ret; +} + +static INLINE vdouble vneg_vd_vd(vdouble d) +{ + vdouble ret; +#ifdef __clang__ + ret = vec_neg(d); +#else + __asm__ __volatile__("xvnegdp %x0,%x1" : "=wa" (ret) : "wa" (d)); +#endif + return ret; +} + +static INLINE vfloat vposneg_vf_vf(vfloat d) +{ return vec_xor(d, vset__vf(+0.0f, -0.0f, +0.0f, -0.0f)); } +static INLINE vdouble vposneg_vd_vd(vdouble d) +{ return vec_xor(d, vset__vd(+0.0, -0.0)); } + +static INLINE vfloat vnegpos_vf_vf(vfloat d) +{ return vec_xor(d, vset__vf(-0.0f, +0.0f, -0.0f, +0.0f)); } +static INLINE vdouble vnegpos_vd_vd(vdouble d) +{ return vec_xor(d, vset__vd(-0.0, +0.0)); } + +////////////// Addition ////////////// +static INLINE vint vadd_vi_vi_vi(vint x, vint y) +{ return vec_add(x, y); } +static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) +{ return vec_add(x, y); } + +static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) +{ return vec_add(x, y); } +static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) +{ return vec_add(x, y); } + +static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) +{ return (vmask)vec_add((v__i64)x, (v__i64)y); } + +////////////// Subtraction ////////////// +static INLINE vint vsub_vi_vi_vi(vint x, vint y) +{ return vec_sub(x, y); } +static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) +{ return vec_sub(x, y); } + +static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) +{ return vec_sub(x, y); } +static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) +{ return vec_sub(x, y); } + +static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) +{ return vec_add(x, vnegpos_vd_vd(y)); } +static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) +{ return vec_add(x, vnegpos_vf_vf(y)); } + +////////////// Multiplication ////////////// +static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) +{ return vec_mul(x, y); } +static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) +{ return vec_mul(x, y); } + +static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) +{ return vec_div(x, y); } +static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) +{ return vec_div(x, y); } + +static INLINE vfloat vrec_vf_vf(vfloat x) +{ return vec_div(vsetall__vf(1.0f), x); } +static INLINE vdouble vrec_vd_vd(vdouble x) +{ return vec_div(vsetall__vd(1.0), x); } + +/********************************************** + ** Math + **********************************************/ + +static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) +{ return vec_max(x, y); } +static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) +{ return vec_max(x, y); } + +static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) +{ return vec_min(x, y); } +static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) +{ return vec_min(x, y); } + +static INLINE vfloat vabs_vf_vf(vfloat f) +{ return vec_abs(f); } +static INLINE vdouble vabs_vd_vd(vdouble d) +{ return vec_abs(d); } + +static INLINE vfloat vsqrt_vf_vf(vfloat f) +{ return vec_sqrt(f); } +static INLINE vdouble vsqrt_vd_vd(vdouble d) +{ return vec_sqrt(d); } + + +/********************************************** + ** FMA3 + **********************************************/ +#if CONFIG == 1 + +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_madd(x, y, z); } +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_madd(x, y, z); } + +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_msub(x, y, z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_msub(x, y, z); } + +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_nmsub(x, y, z); } +static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_nmsub(x, y, z); } + +#else + +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_add(vec_mul(x, y), z); } +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_add(vec_mul(x, y), z); } + +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_sub(vec_mul(x, y), z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_sub(vec_mul(x, y), z); } + +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_sub(z, vec_mul(x, y)); } +static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_sub(z, vec_mul(x, y)); } + +#endif + +static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_madd(x, y, z); } +static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_madd(x, y, z); } +static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_madd(x, y, z); } +static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_madd(x, y, z); } + +static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_msub(x, y, z); } +static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_msub(x, y, z); } + +static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_nmsub(x, y, z); } +static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_nmsub(x, y, z); } + +static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vec_nmadd(x, y, z); } +static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vec_nmadd(x, y, z); } + +static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) +{ return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } +static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) +{ return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } diff --git a/src/helpersse2.h b/src/helpersse2.h new file mode 100644 index 00000000..99877ab2 --- /dev/null +++ b/src/helpersse2.h @@ -0,0 +1,530 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#if CONFIG == 2 + +#if !defined(__SSE2__) && !defined(SLEEF_GENHEADER) +#error Please specify -msse2. +#endif + +#elif CONFIG == 3 + +#if (!defined(__SSE2__) || !defined(__SSE3__)) && !defined(SLEEF_GENHEADER) +#error Please specify -msse2 and -msse3 +#endif + +#elif CONFIG == 4 + +#if (!defined(__SSE2__) || !defined(__SSE3__) || !defined(__SSE4_1__)) && !defined(SLEEF_GENHEADER) +#error Please specify -msse2, -msse3 and -msse4.1 +#endif + +#else +#error CONFIG macro invalid or not defined +#endif + +#define ENABLE_DP +//@#define ENABLE_DP +#define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 +#define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) + +#define ENABLE_SP +//@#define ENABLE_SP +#define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) +#define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) + +#define ACCURATE_SQRT +//@#define ACCURATE_SQRT + +#if !defined(SLEEF_GENHEADER) +#if defined(_MSC_VER) +#include +#else +#include +#endif + +#include +#include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) + +typedef __m128i vmask; +typedef __m128i vopmask; + +typedef __m128d vdouble; +typedef __m128i vint; + +typedef __m128 vfloat; +typedef __m128i vint2; + +typedef struct { + vmask x, y; +} vmask2; + +// + +#if !defined(SLEEF_GENHEADER) + +#ifndef __SLEEF_H__ +static inline + void Sleef_x86CpuID(int32_t out[4], uint32_t eax, + uint32_t ecx) { + /* We don't care for cpuid detection */ + out[0] = 0xFFFFFFFF; + out[1] = 0xFFFFFFFF; + out[2] = 0xFFFFFFFF; + out[3] = 0xFFFFFFFF; + } + #endif + +static INLINE int cpuSupportsSSE2() { + int32_t reg[4]; + Sleef_x86CpuID(reg, 1, 0); + return (reg[3] & (1 << 26)) != 0; +} + +static INLINE int cpuSupportsSSE3() { + int32_t reg[4]; + Sleef_x86CpuID(reg, 1, 0); + return (reg[2] & (1 << 0)) != 0; +} + +static INLINE int cpuSupportsSSE4_1() { + int32_t reg[4]; + Sleef_x86CpuID(reg, 1, 0); + return (reg[2] & (1 << 19)) != 0; +} + +#if defined(__SSE2__) && defined(__SSE3__) && defined(__SSE4_1__) +static INLINE int vavailability_i(int name) { + //int d = __builtin_cpu_supports("sse2") && __builtin_cpu_supports("sse3") && __builtin_cpu_supports("sse4.1"); + int d = cpuSupportsSSE2() && cpuSupportsSSE3() && cpuSupportsSSE4_1(); + return d ? 3 : 0; +} +#define ISANAME "SSE4.1" +#define DFTPRIORITY 12 +#elif defined(__SSE2__) && defined(__SSE3__) +static INLINE int vavailability_i(int name) { + //int d = __builtin_cpu_supports("sse2") && __builtin_cpu_supports("sse3"); + int d = cpuSupportsSSE2() && cpuSupportsSSE3(); + return d ? 3 : 0; +} +#define ISANAME "SSE3" +#define DFTPRIORITY 11 +#else +static INLINE int vavailability_i(int name) { + int d = cpuSupportsSSE2(); + return d ? 3 : 0; +} +#define ISANAME "SSE2" +#define DFTPRIORITY 10 +#endif + +#endif // #if !defined(SLEEF_GENHEADER) + +static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } + +static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } +static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } + +// + +static vint2 vloadu_vi2_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } +static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, v); } + +static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } +static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } + +// + +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return _mm_and_si128(x, y); } +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); } +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return _mm_or_si128(x, y); } +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); } + +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return _mm_and_si128(x, y); } +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return _mm_andnot_si128(x, y); } +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return _mm_or_si128(x, y); } +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return _mm_xor_si128(x, y); } + +static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return _mm_and_si128(x, y); } +static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return _mm_or_si128(x, y); } +static INLINE vmask vandnot_vm_vo64_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); } +static INLINE vmask vxor_vm_vo64_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); } + +static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return _mm_and_si128(x, y); } +static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return _mm_or_si128(x, y); } +static INLINE vmask vandnot_vm_vo32_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); } +static INLINE vmask vxor_vm_vo32_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); } + +static INLINE vopmask vcast_vo32_vo64(vopmask m) { return _mm_shuffle_epi32(m, 0x08); } +static INLINE vopmask vcast_vo64_vo32(vopmask m) { return _mm_shuffle_epi32(m, 0x50); } + +// + +static INLINE vint vrint_vi_vd(vdouble vd) { return _mm_cvtpd_epi32(vd); } +static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm_cvttpd_epi32(vd); } +static INLINE vdouble vcast_vd_vi(vint vi) { return _mm_cvtepi32_pd(vi); } +static INLINE vint vcast_vi_i(int i) { return _mm_set_epi32(0, 0, i, i); } +static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm_and_si128(_mm_shuffle_epi32(vi, 0x73), _mm_set_epi32(-1, 0, -1, 0)); } +static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_shuffle_epi32(vi, 0x0d); } + +#if CONFIG == 4 +static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } +static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } +static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } +static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpeq_epi64(x, y); } +#define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING +#else +static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); } +static INLINE vdouble vrint_vd_vd(vdouble vd) { return vcast_vd_vi(vrint_vi_vd(vd)); } +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { + vmask t = _mm_cmpeq_epi32(x, y); + return vand_vm_vm_vm(t, _mm_shuffle_epi32(t, 0xb1)); +} +#endif + +static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm_add_epi64(x, y); } + +static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm_set_epi32(i0, i1, i0, i1); } + +// + +static INLINE vdouble vcast_vd_d(double d) { return _mm_set1_pd(d); } +static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm_castpd_si128(vd); } +static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm_castpd_si128(vd); } +static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm_castsi128_pd(vi); } +static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm_castsi128_pd(vm); } + +static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_add_pd(x, y); } +static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm_sub_pd(x, y); } +static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm_mul_pd(x, y); } +static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm_div_pd(x, y); } +static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm_div_pd(_mm_set1_pd(1), x); } +static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm_sqrt_pd(x); } +static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm_andnot_pd(_mm_set1_pd(-0.0), d); } +static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm_xor_pd(_mm_set1_pd(-0.0), d); } +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(z, vmul_vd_vd_vd(x, y)); } +static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm_max_pd(x, y); } +static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm_min_pd(x, y); } + +static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpeq_pd(x, y)); } +static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpneq_pd(x, y)); } +static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmplt_pd(x, y)); } +static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmple_pd(x, y)); } +static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpgt_pd(x, y)); } +static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpge_pd(x, y)); } + +static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } +static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } +static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } + +static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } +static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } +static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } +static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } + +static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return _mm_and_si128(x, y); } +static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return _mm_andnot_si128(x, y); } + +static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } +static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } +static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } + +static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } +static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } + +static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } +static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } + +#if CONFIG == 4 +static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, m); } + +static INLINE vdouble vsel_vd_vo_vd_vd(vopmask m, vdouble x, vdouble y) { return _mm_blendv_pd(y, x, _mm_castsi128_pd(m)); } +#else +static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return vor_vm_vm_vm(vand_vm_vm_vm(m, x), vandnot_vm_vm_vm(m, y)); } + +static INLINE vdouble vsel_vd_vo_vd_vd(vopmask opmask, vdouble x, vdouble y) { + return _mm_or_pd(_mm_and_pd(_mm_castsi128_pd(opmask), x), _mm_andnot_pd(_mm_castsi128_pd(opmask), y)); +} +#endif + +static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { + return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); +} + +static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); +} + +static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); +} + +static INLINE vopmask visinf_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm_cmpeq_pd(vabs_vd_vd(d), _mm_set1_pd(SLEEF_INFINITY))); +} + +static INLINE vopmask vispinf_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm_cmpeq_pd(d, _mm_set1_pd(SLEEF_INFINITY))); +} + +static INLINE vopmask visminf_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm_cmpeq_pd(d, _mm_set1_pd(-SLEEF_INFINITY))); +} + +static INLINE vopmask visnan_vo_vd(vdouble d) { + return vreinterpret_vm_vd(_mm_cmpneq_pd(d, d)); +} + +// + +static INLINE vdouble vload_vd_p(const double *ptr) { return _mm_load_pd(ptr); } +static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm_loadu_pd(ptr); } + +static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm_store_pd(ptr, v); } +static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm_storeu_pd(ptr, v); } + +static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { + int a[sizeof(vint)/sizeof(int)]; + vstoreu_v_p_vi(a, vi); + return _mm_set_pd(ptr[a[1]], ptr[a[0]]); +} + +// This function is for debugging +static INLINE double vcast_d_vd(vdouble v) { + double a[VECTLENDP]; + vstoreu_v_p_vd(a, v); + return a[0]; +} + +// + +static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } +static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } +static INLINE vint2 vrint_vi2_vf(vfloat vf) { return _mm_cvtps_epi32(vf); } +static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return _mm_cvttps_epi32(vf); } +static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm_cvtepi32_ps(vcast_vm_vi2(vi)); } +static INLINE vfloat vcast_vf_f(float f) { return _mm_set1_ps(f); } +static INLINE vint2 vcast_vi2_i(int i) { return _mm_set1_epi32(i); } +static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm_castps_si128(vf); } +static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm_castsi128_ps(vm); } +static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return _mm_castsi128_ps(vm); } +static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return _mm_castps_si128(vf); } + +#if CONFIG != 4 +static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_vi2_vf(vd)); } +static INLINE vfloat vrint_vf_vf(vfloat vf) { return vcast_vf_vi2(vrint_vi2_vf(vf)); } +#endif + +static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_add_ps(x, y); } +static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm_sub_ps(x, y); } +static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm_mul_ps(x, y); } +static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm_div_ps(x, y); } +static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } +static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm_sqrt_ps(x); } +static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } +static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } +static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm_max_ps(x, y); } +static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm_min_ps(x, y); } + +static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpeq_ps(x, y)); } +static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpneq_ps(x, y)); } +static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmplt_ps(x, y)); } +static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmple_ps(x, y)); } +static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpgt_ps(x, y)); } +static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpge_ps(x, y)); } + +static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vadd_vi_vi_vi(x, y); } +static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsub_vi_vi_vi(x, y); } +static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } + +static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vand_vi_vi_vi(x, y); } +static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vandnot_vi_vi_vi(x, y); } +static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vor_vi_vi_vi(x, y); } +static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return vxor_vi_vi_vi(x, y); } + +static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi_vo_vi(x, y); } +static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi_vo_vi(x, y); } + +static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return vsll_vi_vi_i(x, c); } +static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return vsrl_vi_vi_i(x, c); } +static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return vsra_vi_vi_i(x, c); } + +static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } +static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } +static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } +static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } + +#if CONFIG == 4 +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm_blendv_epi8(y, x, m); } + +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask m, vfloat x, vfloat y) { return _mm_blendv_ps(y, x, _mm_castsi128_ps(m)); } +#else +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { + return vor_vi2_vi2_vi2(vand_vi2_vi2_vi2(m, x), vandnot_vi2_vi2_vi2(m, y)); +} + +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask opmask, vfloat x, vfloat y) { + return _mm_or_ps(_mm_and_ps(_mm_castsi128_ps(opmask), x), _mm_andnot_ps(_mm_castsi128_ps(opmask), y)); +} +#endif + +static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { + return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); +} + +static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); +} + +static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); +} + +static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } +static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } +static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } + +static INLINE vfloat vload_vf_p(const float *ptr) { return _mm_load_ps(ptr); } +static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm_loadu_ps(ptr); } + +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm_store_ps(ptr, v); } +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm_storeu_ps(ptr, v); } + +static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi) { + int a[VECTLENSP]; + vstoreu_v_p_vi2(a, vi); + return _mm_set_ps(ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); +} + +// This function is for debugging +static INLINE float vcast_f_vf(vfloat v) { + float a[VECTLENSP]; + vstoreu_v_p_vf(a, v); + return a[0]; +} + +// + +#define PNMASK ((vdouble) { +0.0, -0.0 }) +#define NPMASK ((vdouble) { -0.0, +0.0 }) +#define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) +#define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) + +static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } +static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } +static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } +static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } + +#if CONFIG >= 3 +static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_addsub_pd(x, y); } +static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_addsub_ps(x, y); } +#else +static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } +static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } +#endif +static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } + +static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm_shuffle_pd(d0, d0, 1); } +static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } + +static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm_stream_pd(ptr, v); } +static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } +static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd((double *)(&ptr[2*offset]), v); } + +// + +static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } +static INLINE vfloat vreva2_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } +static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } + +static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm_stream_ps(ptr, v); } + +static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); + _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); +} + +static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); + _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); +} + +// + +static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { + return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; +} + +static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { + return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; +} + +static INLINE vint vuninterleave_vi_vi(vint v) { return v; } +static INLINE vdouble vinterleave_vd_vd(vdouble vd) { return vd; } +static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return vd; } +static INLINE vmask vinterleave_vm_vm(vmask vm) { return vm; } +static INLINE vmask vuninterleave_vm_vm(vmask vm) { return vm; } + +static vmask2 vloadu_vm2_p(void *p) { + vmask2 vm2; + memcpy(&vm2, p, VECTLENDP * 16); + return vm2; +} + +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad2 vargquad; + +static INLINE vmask2 vcast_vm2_aq(vargquad aq) { + return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); +} + +static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { + vm2 = vuninterleave_vm2_vm2(vm2); + vargquad aq; + memcpy(&aq, &vm2, VECTLENDP * 16); + return aq; +} +#endif // #if !defined(SLEEF_GENHEADER) + +static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0; } + +static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { + return vor_vm_vm_vm(vand_vm_vm_vm(o, x), vandnot_vm_vm_vm(o, y)); +} + +static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return _mm_sub_epi64(x, y); } +static INLINE vmask vneg64_vm_vm(vmask x) { return _mm_sub_epi64(vcast_vm_i_i(0, 0), x); } + +#define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) +#define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) + +static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { + int64_t ax[2], ay[2]; + _mm_storeu_si128((__m128i *)ax, x); + _mm_storeu_si128((__m128i *)ay, y); + return _mm_set_epi64x(ax[1] > ay[1] ? -1 : 0, ax[0] > ay[0] ? -1 : 0); +} + +static INLINE vmask vcast_vm_vi(vint vi) { + vmask m = _mm_and_si128(_mm_shuffle_epi32(vi, (0 << 6) | (1 << 4) | (0 << 2) | (0 << 0)), _mm_set_epi32(0, -1, 0, -1)); + return vor_vm_vm_vm(vcastu_vi2_vi(vgt_vo_vi_vi(vcast_vi_i(0), vi)), m); +} +static INLINE vint vcast_vi_vm(vmask vm) { return _mm_shuffle_epi32(vm, 0x08); } diff --git a/src/helpersve.h b/src/helpersve.h new file mode 100644 index 00000000..4aacab7b --- /dev/null +++ b/src/helpersve.h @@ -0,0 +1,1150 @@ +/*********************************************************************/ +/* Copyright ARM Ltd. 2010 - 2019. */ +/* Distributed under the Boost Software License, Version 1.0. */ +/* (See accompanying file LICENSE.txt or copy at */ +/* http://www.boost.org/LICENSE_1_0.txt) */ +/*********************************************************************/ + +#if !defined(__ARM_FEATURE_SVE) && !defined(SLEEF_GENHEADER) +#error Please specify SVE flags. +#endif + +#if !defined(SLEEF_GENHEADER) +#include +#include + +#include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) + +#if defined(VECTLENDP) || defined(VECTLENSP) +#error VECTLENDP or VECTLENSP already defined +#endif + +#if CONFIG == 1 || CONFIG == 2 +// Vector length agnostic +#define VECTLENSP (svcntw()) +//@#define VECTLENSP (svcntw()) +#define VECTLENDP (svcntd()) +//@#define VECTLENDP (svcntd()) +#define ISANAME "AArch64 SVE" +#define ptrue svptrue_b8() +//@#define ptrue svptrue_b8() +#elif CONFIG == 8 +// 256-bit vector length +#define ISANAME "AArch64 SVE 256-bit" +#define LOG2VECTLENDP 2 +#define ptrue svptrue_pat_b8(SV_VL32) +#define DFTPRIORITY 20 +#elif CONFIG == 9 +// 512-bit vector length +#define ISANAME "AArch64 SVE 512-bit" +#define LOG2VECTLENDP 3 +#define ptrue svptrue_pat_b8(SV_VL64) +#define DFTPRIORITY 21 +#elif CONFIG == 10 +// 1024-bit vector length +#define ISANAME "AArch64 SVE 1024-bit" +#define LOG2VECTLENDP 4 +#define ptrue svptrue_pat_b8(SV_VL128) +#define DFTPRIORITY 22 +#elif CONFIG == 11 +// 2048-bit vector length +#define ISANAME "AArch64 SVE 2048-bit" +#define LOG2VECTLENDP 5 +#define ptrue svptrue_pat_b8(SV_VL256) +#define DFTPRIORITY 23 +#else +#error CONFIG macro invalid or not defined +#endif + +#ifdef LOG2VECTLENDP +// For DFT, VECTLENDP and VECTLENSP are not the size of the available +// vector length, but the size of the partial vectors utilized in the +// computation. The appropriate VECTLENDP and VECTLENSP are chosen by +// the dispatcher according to the value of svcntd(). + +#define LOG2VECTLENSP (LOG2VECTLENDP+1) +#define VECTLENDP (1 << LOG2VECTLENDP) +#define VECTLENSP (1 << LOG2VECTLENSP) +static INLINE int vavailability_i(int name) { return svcntd() >= VECTLENDP ? 3 : 0; } +#else +static INLINE int vavailability_i(int name) { return 3; } +#endif + +#define ENABLE_SP +//@#define ENABLE_SP +#define ENABLE_DP +//@#define ENABLE_DP + +#if CONFIG != 2 +#define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP +#define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP +//#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled. +#endif + +#define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING +#define ACCURATE_SQRT +//@#define ACCURATE_SQRT + +// Type definitions + +// Mask definition +typedef svint32_t vmask; +typedef svbool_t vopmask; + +// Single precision definitions +typedef svfloat32_t vfloat; +typedef svint32_t vint2; + +// Double precision definitions +typedef svfloat64_t vdouble; +typedef svint32_t vint; + +// Double-double data type with setter/getter functions +typedef svfloat64x2_t vdouble2; +static INLINE vdouble vd2getx_vd_vd2(vdouble2 v) { return svget2_f64(v, 0); } +static INLINE vdouble vd2gety_vd_vd2(vdouble2 v) { return svget2_f64(v, 1); } +static INLINE vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) { return svcreate2_f64(x, y); } +static INLINE vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) { return svset2_f64(v, 0, d); } +static INLINE vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) { return svset2_f64(v, 1, d); } + +// Double-float data type with setter/getter functions +typedef svfloat32x2_t vfloat2; +static INLINE vfloat vf2getx_vf_vf2(vfloat2 v) { return svget2_f32(v, 0); } +static INLINE vfloat vf2gety_vf_vf2(vfloat2 v) { return svget2_f32(v, 1); } +static INLINE vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) { return svcreate2_f32(x, y); } +static INLINE vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) { return svset2_f32(v, 0, d); } +static INLINE vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) { return svset2_f32(v, 1, d); } + +// vmask2 is mainly used for quad-precision functions +typedef svint32x2_t vmask2; +static INLINE vmask vm2getx_vm_vm2(vmask2 v) { return svget2_s32(v, 0); } +static INLINE vmask vm2gety_vm_vm2(vmask2 v) { return svget2_s32(v, 1); } +static INLINE vmask2 vm2setxy_vm2_vm_vm(vmask x, vmask y) { return svcreate2_s32(x, y); } +static INLINE vmask2 vm2setx_vm2_vm2_vm(vmask2 v, vmask x) { return svset2_s32(v, 0, x); } +static INLINE vmask2 vm2sety_vm2_vm2_vm(vmask2 v, vmask y) { return svset2_s32(v, 1, y); } + +// Auxiliary data types + +typedef svfloat64x2_t di_t; + +static INLINE vdouble digetd_vd_di(di_t d) { return svget2_f64(d, 0); } +static INLINE vint digeti_vi_di(di_t d) { return svreinterpret_s32_f64(svget2_f64(d, 1)); } +static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) { + return svcreate2_f64(d, svreinterpret_f64_s32(i)); +} + +// + +typedef svfloat32x2_t fi_t; + +static INLINE vfloat figetd_vf_di(fi_t d) { return svget2_f32(d, 0); } +static INLINE vint2 figeti_vi2_di(fi_t d) { return svreinterpret_s32_f32(svget2_f32(d, 1)); } +static INLINE fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) { + return svcreate2_f32(d, svreinterpret_f32_s32(i)); +} + +// + +typedef svfloat64x3_t ddi_t; + +static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) { + return svcreate2_f64(svget3_f64(d, 0), svget3_f64(d, 1)); +} +static INLINE vint ddigeti_vi_ddi(ddi_t d) { return svreinterpret_s32_f64(svget3_f64(d, 2)); } +static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) { + return svcreate3_f64(svget2_f64(v, 0), svget2_f64(v, 1), + svreinterpret_f64_s32(i)); +} +static INLINE ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) { + return svcreate3_f64(svget2_f64(v, 0), svget2_f64(v, 1), svget3_f64(ddi, 2)); +} + +// + +typedef svfloat32x3_t dfi_t; + +static INLINE vfloat2 dfigetdf_vf2_dfi(dfi_t d) { + return svcreate2_f32(svget3_f32(d, 0), svget3_f32(d, 1)); +} +static INLINE vint2 dfigeti_vi2_dfi(dfi_t d) { return svreinterpret_s32_f32(svget3_f32(d, 2)); } +static INLINE dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) { + return svcreate3_f32(svget2_f32(v, 0), svget2_f32(v, 1), + svreinterpret_f32_s32(i)); +} +static INLINE dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { + return svcreate3_f32(svget2_f32(v, 0), svget2_f32(v, 1), svget3_f32(dfi, 2)); +} + +// + +typedef svfloat64x4_t dd2; + +static INLINE dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) { + return svcreate4_f64(svget2_f64(a, 0), svget2_f64(a, 1), + svget2_f64(b, 0), svget2_f64(b, 1)); +} +static INLINE vdouble2 dd2geta_vd2_dd2(dd2 d) { + return svcreate2_f64(svget4_f64(d, 0), svget4_f64(d, 1)); +} +static INLINE vdouble2 dd2getb_vd2_dd2(dd2 d) { + return svcreate2_f64(svget4_f64(d, 2), svget4_f64(d, 3)); +} + +// + +typedef svfloat32x4_t df2; + +static INLINE df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) { + return svcreate4_f32(svget2_f32(a, 0), svget2_f32(a, 1), + svget2_f32(b, 0), svget2_f32(b, 1)); +} +static INLINE vfloat2 df2geta_vf2_df2(df2 d) { + return svcreate2_f32(svget4_f32(d, 0), svget4_f32(d, 1)); +} +static INLINE vfloat2 df2getb_vf2_df2(df2 d) { + return svcreate2_f32(svget4_f32(d, 2), svget4_f32(d, 3)); +} + +// + +typedef svfloat64x3_t vdouble3; + +static INLINE vdouble vd3getx_vd_vd3(vdouble3 v) { return svget3_f64(v, 0); } +static INLINE vdouble vd3gety_vd_vd3(vdouble3 v) { return svget3_f64(v, 1); } +static INLINE vdouble vd3getz_vd_vd3(vdouble3 v) { return svget3_f64(v, 2); } +static INLINE vdouble3 vd3setxyz_vd3_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return svcreate3_f64(x, y, z); } +static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return svset3_f64(v, 0, d); } +static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return svset3_f64(v, 1, d); } +static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return svset3_f64(v, 2, d); } + +// + +typedef svfloat64x4_t tdx; + +static INLINE vmask tdxgete_vm_tdx(tdx t) { + return svreinterpret_s32_f64(svget4_f64(t, 0)); +} +static INLINE vdouble3 tdxgetd3_vd3_tdx(tdx t) { + return svcreate3_f64(svget4_f64(t, 1), svget4_f64(t, 2), svget4_f64(t, 3)); +} +static INLINE vdouble tdxgetd3x_vd_tdx(tdx t) { return svget4_f64(t, 1); } +static INLINE vdouble tdxgetd3y_vd_tdx(tdx t) { return svget4_f64(t, 2); } +static INLINE vdouble tdxgetd3z_vd_tdx(tdx t) { return svget4_f64(t, 3); } +static INLINE tdx tdxsete_tdx_tdx_vm(tdx t, vmask e) { + return svset4_f64(t, 0, svreinterpret_f64_s32(e)); +} +static INLINE tdx tdxsetd3_tdx_tdx_vd3(tdx t, vdouble3 d3) { + return svcreate4_f64(svget4_f64(t, 0), svget3_f64(d3, 0), svget3_f64(d3, 1), svget3_f64(d3, 2)); +} +static INLINE tdx tdxsetx_tdx_tdx_vd(tdx t, vdouble x) { return svset4_f64(t, 1, x); } +static INLINE tdx tdxsety_tdx_tdx_vd(tdx t, vdouble y) { return svset4_f64(t, 2, y); } +static INLINE tdx tdxsetz_tdx_tdx_vd(tdx t, vdouble z) { return svset4_f64(t, 3, z); } +static INLINE tdx tdxsetxyz_tdx_tdx_vd_vd_vd(tdx t, vdouble x, vdouble y, vdouble z) { + return svcreate4_f64(svget4_f64(t, 0), x, y, z); +} + +static INLINE tdx tdxseted3_tdx_vm_vd3(vmask e, vdouble3 d3) { + return svcreate4_f64(svreinterpret_f64_s32(e), svget3_f64(d3, 0), svget3_f64(d3, 1), svget3_f64(d3, 2)); +} +static INLINE tdx tdxsetexyz_tdx_vm_vd_vd_vd(vmask e, vdouble x, vdouble y, vdouble z) { + return svcreate4_f64(svreinterpret_f64_s32(e), x, y, z); +} + +// + +typedef svfloat64x4_t tdi_t; + +static INLINE vdouble3 tdigettd_vd3_tdi(tdi_t d) { + return svcreate3_f64(svget4_f64(d, 0), svget4_f64(d, 1), svget4_f64(d, 2)); +} +static INLINE vdouble tdigetx_vd_tdi(tdi_t d) { return svget4_f64(d, 0); } +static INLINE vint tdigeti_vi_tdi(tdi_t d) { return svreinterpret_s32_f64(svget4_f64(d, 3)); } +static INLINE tdi_t tdisettdi_tdi_vd3_vi(vdouble3 v, vint i) { + return svcreate4_f64(svget3_f64(v, 0), svget3_f64(v, 1), svget3_f64(v, 2), + svreinterpret_f64_s32(i)); +} +static INLINE tdi_t tdisettd_tdi_tdi_vd3(tdi_t tdi, vdouble3 v) { + return svcreate4_f64(svget3_f64(v, 0), svget3_f64(v, 1), svget3_f64(v, 2), svget4_f64(tdi, 3)); +} + +// + +// masking predicates +#define ALL_TRUE_MASK svdup_n_s32(0xffffffff) +#define ALL_FALSE_MASK svdup_n_s32(0x0) +//@#define ALL_TRUE_MASK svdup_n_s32(0xffffffff) +//@#define ALL_FALSE_MASK svdup_n_s32(0x0) + +static INLINE void vprefetch_v_p(const void *ptr) {} + +// +// +// +// Test if all lanes are active +// +// +// +static INLINE int vtestallones_i_vo32(vopmask g) { + svbool_t pg = svptrue_b32(); + return (svcntp_b32(pg, g) == svcntw()); +} + +static INLINE int vtestallones_i_vo64(vopmask g) { + svbool_t pg = svptrue_b64(); + return (svcntp_b64(pg, g) == svcntd()); +} +// +// +// +// +// +// + +// Vector load / store +static INLINE void vstoreu_v_p_vi2(int32_t *p, vint2 v) { svst1_s32(ptrue, p, v); } + +static INLINE vfloat vload_vf_p(const float *ptr) { + return svld1_f32(ptrue, ptr); +} +static INLINE vfloat vloadu_vf_p(const float *ptr) { + return svld1_f32(ptrue, ptr); +} +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { + svst1_f32(ptrue, ptr, v); +} + +// Basic logical operations for mask +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { + return svand_s32_x(ptrue, x, y); +} +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { + return svbic_s32_x(ptrue, y, x); +} +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { + return svorr_s32_x(ptrue, x, y); +} +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { + return sveor_s32_x(ptrue, x, y); +} + +static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { + return svreinterpret_s32_s64( + svadd_s64_x(ptrue, svreinterpret_s64_s32(x), + svreinterpret_s64_s32(y))); +} + +// Mask <--> single precision reinterpret +static INLINE vmask vreinterpret_vm_vf(vfloat vf) { + return svreinterpret_s32_f32(vf); +} +static INLINE vfloat vreinterpret_vf_vm(vmask vm) { + return svreinterpret_f32_s32(vm); +} +static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { + return svreinterpret_f32_s32(vm); +} +static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { + return svreinterpret_s32_f32(vf); +} +static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } +static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } + +// Conditional select +static INLINE vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) { + return svsel_s32(svcmpeq_s32(ptrue, m, ALL_TRUE_MASK), x, y); +} + +/****************************************/ +/* Single precision FP operations */ +/****************************************/ +// Broadcast +static INLINE vfloat vcast_vf_f(float f) { return svdup_n_f32(f); } + +// Add, Sub, Mul +static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { + return svadd_f32_x(ptrue, x, y); +} +static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { + return svsub_f32_x(ptrue, x, y); +} +static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { + return svmul_f32_x(ptrue, x, y); +} + +// |x|, -x +static INLINE vfloat vabs_vf_vf(vfloat f) { return svabs_f32_x(ptrue, f); } +static INLINE vfloat vneg_vf_vf(vfloat f) { return svneg_f32_x(ptrue, f); } + +// max, min +static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { + return svmax_f32_x(ptrue, x, y); +} +static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { + return svmin_f32_x(ptrue, x, y); +} + +// int <--> float conversions +static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { + return svcvt_s32_f32_x(ptrue, vf); +} +static INLINE vfloat vcast_vf_vi2(vint2 vi) { + return svcvt_f32_s32_x(ptrue, vi); +} +static INLINE vint2 vcast_vi2_i(int i) { return svdup_n_s32(i); } +static INLINE vint2 vrint_vi2_vf(vfloat d) { + return svcvt_s32_f32_x(ptrue, svrintn_f32_x(ptrue, d)); +} + +#if CONFIG == 1 +// Multiply accumulate: z = z + x * y +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return svmad_f32_x(ptrue, x, y, z); +} +// Multiply subtract: z = z - x * y +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return svmsb_f32_x(ptrue, x, y, z); +} +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return svnmsb_f32_x(ptrue, x, y, z); +} +#else +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +#endif + +// fused multiply add / sub +static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, + vfloat z) { // z + x * y + return svmad_f32_x(ptrue, x, y, z); +} +static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, + vfloat z) { // z - x * y + return svmsb_f32_x(ptrue, x, y, z); +} +static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, + vfloat z) { // x * y - z + return svnmsb_f32_x(ptrue, x, y, z); +} + +// conditional select +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { + return svsel_f32(mask, x, y); +} + +// Reciprocal 1/x, Division, Square root +static INLINE vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) { +#ifndef ENABLE_ALTDIV + return svdiv_f32_x(ptrue, n, d); +#else + // Finite numbers (including denormal) only, gives mostly correctly rounded result + vfloat t, u, x, y; + svuint32_t i0, i1; + i0 = svand_u32_x(ptrue, svreinterpret_u32_f32(n), svdup_n_u32(0x7c000000)); + i1 = svand_u32_x(ptrue, svreinterpret_u32_f32(d), svdup_n_u32(0x7c000000)); + i0 = svsub_u32_x(ptrue, svdup_n_u32(0x7d000000), svlsr_n_u32_x(ptrue, svadd_u32_x(ptrue, i0, i1), 1)); + t = svreinterpret_f32_u32(i0); + y = svmul_f32_x(ptrue, d, t); + x = svmul_f32_x(ptrue, n, t); + t = svrecpe_f32(y); + t = svmul_f32_x(ptrue, t, svrecps_f32(y, t)); + t = svmul_f32_x(ptrue, t, svrecps_f32(y, t)); + u = svmul_f32_x(ptrue, x, t); + u = svmad_f32_x(ptrue, svmsb_f32_x(ptrue, y, u, x), t, u); + return u; +#endif +} +static INLINE vfloat vrec_vf_vf(vfloat d) { +#ifndef ENABLE_ALTDIV + return svdivr_n_f32_x(ptrue, d, 1.0f); +#else + return vsel_vf_vo_vf_vf(svcmpeq_f32(ptrue, vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)), + vcast_vf_f(0), vdiv_vf_vf_vf(vcast_vf_f(1.0f), d)); +#endif +} +static INLINE vfloat vsqrt_vf_vf(vfloat d) { +#ifndef ENABLE_ALTSQRT + return svsqrt_f32_x(ptrue, d); +#else + // Gives correctly rounded result for all input range + vfloat w, x, y, z; + + y = svrsqrte_f32(d); + x = vmul_vf_vf_vf(d, y); w = vmul_vf_vf_vf(vcast_vf_f(0.5), y); + y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(0.5)); + x = vfma_vf_vf_vf_vf(x, y, x); w = vfma_vf_vf_vf_vf(w, y, w); + + y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(1.5)); w = vadd_vf_vf_vf(w, w); + w = vmul_vf_vf_vf(w, y); + x = vmul_vf_vf_vf(w, d); + y = vfmapn_vf_vf_vf_vf(w, d, x); z = vfmanp_vf_vf_vf_vf(w, x, vcast_vf_f(1)); + z = vfmanp_vf_vf_vf_vf(w, y, z); w = vmul_vf_vf_vf(vcast_vf_f(0.5), x); + w = vfma_vf_vf_vf_vf(w, z, y); + w = vadd_vf_vf_vf(w, x); + + return svsel_f32(svorr_b_z(ptrue, svcmpeq_f32(ptrue, d, vcast_vf_f(0)), + svcmpeq_f32(ptrue, d, vcast_vf_f(SLEEF_INFINITYf))), d, w); +#endif +} +// +// +// +// +// +// +static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { + return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); +} + +static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); +} + +static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { + return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); +} +// +// +// +// +// +// + +// truncate +static INLINE vfloat vtruncate_vf_vf(vfloat vd) { + return svrintz_f32_x(ptrue, vd); +} + +// +// +// +// Round float +// +// +// +static INLINE vfloat vrint_vf_vf(vfloat vf) { + return svrintn_f32_x(svptrue_b32(), vf); +} +// +// +// +// +// +// + +/***************************************/ +/* Single precision integer operations */ +/***************************************/ + +// Add, Sub, Neg (-x) +static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { + return svadd_s32_x(ptrue, x, y); +} +static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { + return svsub_s32_x(ptrue, x, y); +} +static INLINE vint2 vneg_vi2_vi2(vint2 e) { return svneg_s32_x(ptrue, e); } + +// Logical operations +static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { + return svand_s32_x(ptrue, x, y); +} +static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { + return svbic_s32_x(ptrue, y, x); +} +static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { + return svorr_s32_x(ptrue, x, y); +} +static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { + return sveor_s32_x(ptrue, x, y); +} + +// Shifts +#define vsll_vi2_vi2_i(x, c) svlsl_n_s32_x(ptrue, x, c) +//@#define vsll_vi2_vi2_i(x, c) svlsl_n_s32_x(ptrue, x, c) +#define vsrl_vi2_vi2_i(x, c) \ + svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)) +//@#define vsrl_vi2_vi2_i(x, c) svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)) +#define vsra_vi2_vi2_i(x, c) svasr_n_s32_x(ptrue, x, c) +//@#define vsra_vi2_vi2_i(x, c) svasr_n_s32_x(ptrue, x, c) + +// Comparison returning integers +static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { + return svsel_s32(svcmpgt_s32(ptrue, x, y), ALL_TRUE_MASK, ALL_FALSE_MASK); +} + +// conditional select +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { + return svsel_s32(m, x, y); +} + +/****************************************/ +/* opmask operations */ +/****************************************/ +// single precision FP +static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { + return svcmpeq_f32(ptrue, x, y); +} +static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { + return svcmpne_f32(ptrue, x, y); +} +static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { + return svcmplt_f32(ptrue, x, y); +} +static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { + return svcmple_f32(ptrue, x, y); +} +static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { + return svcmpgt_f32(ptrue, x, y); +} +static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { + return svcmpge_f32(ptrue, x, y); +} +static INLINE vopmask visinf_vo_vf(vfloat d) { + return svcmpeq_n_f32(ptrue, vabs_vf_vf(d), SLEEF_INFINITYf); +} +static INLINE vopmask vispinf_vo_vf(vfloat d) { + return svcmpeq_n_f32(ptrue, d, SLEEF_INFINITYf); +} +static INLINE vopmask visminf_vo_vf(vfloat d) { + return svcmpeq_n_f32(ptrue, d, -SLEEF_INFINITYf); +} +static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } + +// integers +static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { + return svcmpeq_s32(ptrue, x, y); +} +static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { + return svcmpgt_s32(ptrue, x, y); +} + +// logical opmask +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { + return svand_b_z(ptrue, x, y); +} +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { + return svbic_b_z(ptrue, y, x); +} +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { + return svorr_b_z(ptrue, x, y); +} +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { + return sveor_b_z(ptrue, x, y); +} + +static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { + // This needs to be zeroing to prevent asinf and atanf denormal test + // failing. + return svand_s32_z(x, y, y); +} + +// bitmask logical operations +static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { + return svsel_s32(x, y, ALL_FALSE_MASK); +} +static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { + return svsel_s32(x, ALL_FALSE_MASK, y); +} +static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { + return svsel_s32(x, ALL_TRUE_MASK, y); +} + +// broadcast bitmask +static INLINE vmask vcast_vm_i_i(int i0, int i1) { + return svreinterpret_s32_u64( + svdup_n_u64((0xffffffff & (uint64_t)i1) | (((uint64_t)i0) << 32))); +} + +/*********************************/ +/* SVE for double precision math */ +/*********************************/ + +// Vector load/store +static INLINE vdouble vload_vd_p(const double *ptr) { + return svld1_f64(ptrue, ptr); +} +static INLINE vdouble vloadu_vd_p(const double *ptr) { + return svld1_f64(ptrue, ptr); +} +static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { + svst1_f64(ptrue, ptr, v); +} + +static INLINE void vstoreu_v_p_vi(int *ptr, vint v) { + svst1w_s64(ptrue, ptr, svreinterpret_s64_s32(v)); +} +static vint vloadu_vi_p(int32_t *p) { + return svreinterpret_s32_s64(svld1uw_s64(ptrue, (uint32_t *)p)); +} + +// Reinterpret +static INLINE vdouble vreinterpret_vd_vm(vmask vm) { + return svreinterpret_f64_s32(vm); +} +static INLINE vmask vreinterpret_vm_vd(vdouble vd) { + return svreinterpret_s32_f64(vd); +} +static INLINE vdouble vreinterpret_vd_vi2(vint2 x) { + return svreinterpret_f64_s32(x); +} +static INLINE vint2 vreinterpret_vi2_vd(vdouble x) { + return svreinterpret_s32_f64(x); +} +static INLINE vint2 vcastu_vi2_vi(vint x) { + return svreinterpret_s32_s64( + svlsl_n_s64_x(ptrue, svreinterpret_s64_s32(x), 32)); +} +static INLINE vint vcastu_vi_vi2(vint2 x) { + return svreinterpret_s32_u64( + svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), 32)); +} +static INLINE vdouble vcast_vd_vi(vint vi) { + return svcvt_f64_s32_x(ptrue, vi); +} + +// Splat +static INLINE vdouble vcast_vd_d(double d) { return svdup_n_f64(d); } + +// Conditional select +static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { + return svsel_f64(o, x, y); +} + +static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { + return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); +} + +static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); +} + +static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); +} + +static INLINE vint vsel_vi_vo_vi_vi(vopmask o, vint x, vint y) { + return svsel_s32(o, x, y); +} +// truncate +static INLINE vdouble vtruncate_vd_vd(vdouble vd) { + return svrintz_f64_x(ptrue, vd); +} +static INLINE vint vtruncate_vi_vd(vdouble vd) { + return svcvt_s32_f64_x(ptrue, vd); +} +static INLINE vint vrint_vi_vd(vdouble vd) { + return svcvt_s32_f64_x(ptrue, svrintn_f64_x(ptrue, vd)); +} +static INLINE vdouble vrint_vd_vd(vdouble vd) { + return svrintn_f64_x(ptrue, vd); +} + +// FP math operations +static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { + return svadd_f64_x(ptrue, x, y); +} +static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { + return svsub_f64_x(ptrue, x, y); +} +static INLINE vdouble vneg_vd_vd(vdouble x) { return svneg_f64_x(ptrue, x); } +static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { + return svmul_f64_x(ptrue, x, y); +} +static INLINE vdouble vabs_vd_vd(vdouble x) { return svabs_f64_x(ptrue, x); } +static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { + return svmax_f64_x(ptrue, x, y); +} +static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { + return svmin_f64_x(ptrue, x, y); +} + +#if CONFIG == 1 +// Multiply accumulate / subtract +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, + vdouble z) { // z = x*y + z + return svmad_f64_x(ptrue, x, y, z); +} +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, + vdouble z) { // z = x * y - z + return svnmsb_f64_x(ptrue, x, y, z); +} +static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return svmsb_f64_x(ptrue, x, y, z); +} +#else +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +#endif + +static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, + vdouble z) { // z + x * y + return svmad_f64_x(ptrue, x, y, z); +} +static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, + vdouble z) { // z - x * y + return svmsb_f64_x(ptrue, x, y, z); +} +static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, + vdouble z) { // x * y - z + return svnmsb_f64_x(ptrue, x, y, z); +} + +// Reciprocal 1/x, Division, Square root +static INLINE vdouble vdiv_vd_vd_vd(vdouble n, vdouble d) { +#ifndef ENABLE_ALTDIV + return svdiv_f64_x(ptrue, n, d); +#else + // Finite numbers (including denormal) only, gives mostly correctly rounded result + vdouble t, u, x, y; + svuint64_t i0, i1; + i0 = svand_u64_x(ptrue, svreinterpret_u64_f64(n), svdup_n_u64(0x7fc0000000000000L)); + i1 = svand_u64_x(ptrue, svreinterpret_u64_f64(d), svdup_n_u64(0x7fc0000000000000L)); + i0 = svsub_u64_x(ptrue, svdup_n_u64(0x7fd0000000000000L), svlsr_n_u64_x(ptrue, svadd_u64_x(ptrue, i0, i1), 1)); + t = svreinterpret_f64_u64(i0); + y = svmul_f64_x(ptrue, d, t); + x = svmul_f64_x(ptrue, n, t); + t = svrecpe_f64(y); + t = svmul_f64_x(ptrue, t, svrecps_f64(y, t)); + t = svmul_f64_x(ptrue, t, svrecps_f64(y, t)); + t = svmul_f64_x(ptrue, t, svrecps_f64(y, t)); + u = svmul_f64_x(ptrue, x, t); + u = svmad_f64_x(ptrue, svmsb_f64_x(ptrue, y, u, x), t, u); + return u; +#endif +} +static INLINE vdouble vrec_vd_vd(vdouble d) { +#ifndef ENABLE_ALTDIV + return svdivr_n_f64_x(ptrue, d, 1.0); +#else + return vsel_vd_vo_vd_vd(svcmpeq_f64(ptrue, vabs_vd_vd(d), vcast_vd_d(SLEEF_INFINITY)), + vcast_vd_d(0), vdiv_vd_vd_vd(vcast_vd_d(1.0f), d)); +#endif +} +static INLINE vdouble vsqrt_vd_vd(vdouble d) { +#ifndef ENABLE_ALTSQRT + return svsqrt_f64_x(ptrue, d); +#else + // Gives correctly rounded result for all input range + vdouble w, x, y, z; + + y = svrsqrte_f64(d); + x = vmul_vd_vd_vd(d, y); w = vmul_vd_vd_vd(vcast_vd_d(0.5), y); + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); + x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); + x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); + + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(1.5)); w = vadd_vd_vd_vd(w, w); + w = vmul_vd_vd_vd(w, y); + x = vmul_vd_vd_vd(w, d); + y = vfmapn_vd_vd_vd_vd(w, d, x); z = vfmanp_vd_vd_vd_vd(w, x, vcast_vd_d(1)); + z = vfmanp_vd_vd_vd_vd(w, y, z); w = vmul_vd_vd_vd(vcast_vd_d(0.5), x); + w = vfma_vd_vd_vd_vd(w, z, y); + w = vadd_vd_vd_vd(w, x); + + return svsel_f64(svorr_b_z(ptrue, svcmpeq_f64(ptrue, d, vcast_vd_d(0)), + svcmpeq_f64(ptrue, d, vcast_vd_d(SLEEF_INFINITY))), d, w); +#endif +} + +// Float comparison +static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { + return svcmplt_f64(ptrue, x, y); +} +static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { + return svcmpeq_f64(ptrue, x, y); +} +static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { + return svcmpgt_f64(ptrue, x, y); +} +static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { + return svcmpge_f64(ptrue, x, y); +} +static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { + return svcmpne_f64(ptrue, x, y); +} +static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { + return svcmple_f64(ptrue, x, y); +} + +// predicates +static INLINE vopmask visnan_vo_vd(vdouble vd) { + return svcmpne_f64(ptrue, vd, vd); +} +static INLINE vopmask visinf_vo_vd(vdouble vd) { + return svcmpeq_n_f64(ptrue, svabs_f64_x(ptrue, vd), SLEEF_INFINITY); +} +static INLINE vopmask vispinf_vo_vd(vdouble vd) { + return svcmpeq_n_f64(ptrue, vd, SLEEF_INFINITY); +} +static INLINE vopmask visminf_vo_vd(vdouble vd) { + return svcmpeq_n_f64(ptrue, vd, -SLEEF_INFINITY); +} + +// Comparing bit masks +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { + return svcmpeq_s64(ptrue, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y)); +} + +// pure predicate operations +static INLINE vopmask vcast_vo32_vo64(vopmask o) { return o; } +static INLINE vopmask vcast_vo64_vo32(vopmask o) { return o; } + +// logical integer operations +static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { + // This needs to be a zeroing instruction because we need to make + // sure that the inactive elements for the unpacked integers vector + // are zero. + return svand_s32_z(x, y, y); +} + +static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { + return svsel_s32(x, ALL_FALSE_MASK, y); +} +#define vsra_vi_vi_i(x, c) svasr_n_s32_x(ptrue, x, c) +//@#define vsra_vi_vi_i(x, c) svasr_n_s32_x(ptrue, x, c) +#define vsll_vi_vi_i(x, c) svlsl_n_s32_x(ptrue, x, c) +//@#define vsll_vi_vi_i(x, c) svlsl_n_s32_x(ptrue, x, c) + +static INLINE vint vsrl_vi_vi_i(vint x, int c) { + return svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)); +} + +static INLINE vint vand_vi_vi_vi(vint x, vint y) { + return svand_s32_x(ptrue, x, y); +} +static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { + return svbic_s32_x(ptrue, y, x); +} +static INLINE vint vxor_vi_vi_vi(vint x, vint y) { + return sveor_s32_x(ptrue, x, y); +} + +// integer math +static INLINE vint vadd_vi_vi_vi(vint x, vint y) { + return svadd_s32_x(ptrue, x, y); +} +static INLINE vint vsub_vi_vi_vi(vint x, vint y) { + return svsub_s32_x(ptrue, x, y); +} +static INLINE vint vneg_vi_vi(vint x) { return svneg_s32_x(ptrue, x); } + +// integer comparison +static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { + return svcmpgt_s32(ptrue, x, y); +} +static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { + return svcmpeq_s32(ptrue, x, y); +} + +// Splat +static INLINE vint vcast_vi_i(int i) { return svdup_n_s32(i); } + +// bitmask logical operations +static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { + // This needs to be a zeroing instruction because we need to make + // sure that the inactive elements for the unpacked integers vector + // are zero. + return svreinterpret_s32_s64( + svand_s64_z(x, svreinterpret_s64_s32(y), svreinterpret_s64_s32(y))); +} +static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { + return svreinterpret_s32_s64(svsel_s64( + x, svreinterpret_s64_s32(ALL_FALSE_MASK), svreinterpret_s64_s32(y))); +} +static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { + return svreinterpret_s32_s64(svsel_s64( + x, svreinterpret_s64_s32(ALL_TRUE_MASK), svreinterpret_s64_s32(y))); +} + +static INLINE vfloat vrev21_vf_vf(vfloat vf) { + return svreinterpret_f32_u64(svrevw_u64_x(ptrue, svreinterpret_u64_f32(vf))); +} + +static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } + +// Comparison returning integer +static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { + return svsel_s32(svcmpeq_s32(ptrue, x, y), ALL_TRUE_MASK, ALL_FALSE_MASK); +} + +// Gather + +static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { + return svld1_gather_s64index_f64(ptrue, ptr, svreinterpret_s64_s32(vi)); +} + +static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { + return svld1_gather_s32index_f32(ptrue, ptr, vi2); +} + +// Operations for DFT + +static INLINE vdouble vposneg_vd_vd(vdouble d) { + return svneg_f64_m(d, svdupq_n_b64(0, 1), d); +} + +static INLINE vdouble vnegpos_vd_vd(vdouble d) { + return svneg_f64_m(d, svdupq_n_b64(1, 0), d); +} + +static INLINE vfloat vposneg_vf_vf(vfloat d) { + return svneg_f32_m(d, svdupq_n_b32(0, 1, 0, 1), d); +} + +static INLINE vfloat vnegpos_vf_vf(vfloat d) { + return svneg_f32_m(d, svdupq_n_b32(1, 0, 1, 0), d); +} + +static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } +static INLINE vfloat vsubadd_vf_vf_vf(vfloat d0, vfloat d1) { return vadd_vf_vf_vf(d0, vnegpos_vf_vf(d1)); } +static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vfma_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } +static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfma_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } + +// + +static INLINE vdouble vrev21_vd_vd(vdouble x) { return svzip1_f64(svuzp2_f64(x, x), svuzp1_f64(x, x)); } + +static INLINE vdouble vreva2_vd_vd(vdouble vd) { + svint64_t x = svindex_s64((VECTLENDP-1), -1); + x = svzip1_s64(svuzp2_s64(x, x), svuzp1_s64(x, x)); + return svtbl_f64(vd, svreinterpret_u64_s64(x)); +} + +static INLINE vfloat vreva2_vf_vf(vfloat vf) { + svint32_t x = svindex_s32((VECTLENSP-1), -1); + x = svzip1_s32(svuzp2_s32(x, x), svuzp1_s32(x, x)); + return svtbl_f32(vf, svreinterpret_u32_s32(x)); +} + +// + +static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { + svst1_scatter_u64index_f64(ptrue, ptr + offset*2, svzip1_u64(svindex_u64(0, step*2), svindex_u64(1, step*2)), v); +} + +static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { + svst1_scatter_u32index_f32(ptrue, ptr + offset*2, svzip1_u32(svindex_u32(0, step*2), svindex_u32(1, step*2)), v); +} + +static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { vstoreu_v_p_vd(ptr, v); } +static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { vstore_v_p_vd(ptr, v); } +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vstoreu_v_p_vf(ptr, v); } +static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } +static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vscatter2_v_p_i_i_vd(ptr, offset, step, v); } +static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } + +// These functions are for debugging +static double vcast_d_vd(vdouble v) { + double a[svcntd()]; + vstoreu_v_p_vd(a, v); + return a[0]; +} + +static float vcast_f_vf(vfloat v) { + float a[svcntw()]; + vstoreu_v_p_vf(a, v); + return a[0]; +} + +static int vcast_i_vi(vint v) { + int a[svcntw()]; + vstoreu_v_p_vi(a, v); + return a[0]; +} + +static int vcast_i_vi2(vint2 v) { + int a[svcntw()]; + vstoreu_v_p_vi2(a, v); + return a[0]; +} + +// + +static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { + return vm2setxy_vm2_vm_vm(svreinterpret_s32_u64(svtrn1_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v)))), + svreinterpret_s32_u64(svtrn2_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v))))); +} + +static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { + return vm2setxy_vm2_vm_vm(svreinterpret_s32_u64(svtrn1_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v)))), + svreinterpret_s32_u64(svtrn2_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v))))); +} + +static INLINE vint vuninterleave_vi_vi(vint v) { + return svreinterpret_s32_u64(svuzp1_u64(svtrn1_u64(svreinterpret_u64_s32(v), svreinterpret_u64_s32(v)), + svtrn2_u64(svreinterpret_u64_s32(v), svreinterpret_u64_s32(v)))); +} + +static INLINE vdouble vinterleave_vd_vd(vdouble vd) { + return svtrn1_f64(svzip1_f64(vd, vd), svzip2_f64(vd, vd)); +} + +static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { + return svuzp1_f64(svtrn1_f64(vd, vd), svtrn2_f64(vd, vd)); +} + +static INLINE vmask vinterleave_vm_vm(vmask vm) { + return svreinterpret_s32_u64(svtrn1_u64(svzip1_u64(svreinterpret_u64_s32(vm), svreinterpret_u64_s32(vm)), + svzip2_u64(svreinterpret_u64_s32(vm), svreinterpret_u64_s32(vm)))); +} +static INLINE vmask vuninterleave_vm_vm(vmask vm) { + return svreinterpret_s32_u64(svuzp1_u64(svtrn1_u64(svreinterpret_u64_s32(vm), svreinterpret_u64_s32(vm)), + svtrn2_u64(svreinterpret_u64_s32(vm), svreinterpret_u64_s32(vm)))); +} + +static vmask2 vloadu_vm2_p(void *p) { + vmask2 vm2; + memcpy(&vm2, p, VECTLENDP * 16); + return vm2; +} + +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quadx vargquad; + +static INLINE vmask2 vcast_vm2_aq(vargquad aq) { + return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); +} + +static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { + vm2 = vuninterleave_vm2_vm2(vm2); + vargquad aq; + memcpy(&aq, &vm2, VECTLENDP * 16); + return aq; +} +#endif // #if !defined(SLEEF_GENHEADER) + +static INLINE int vtestallzeros_i_vo64(vopmask g) { + return svcntp_b64(svptrue_b64(), g) == 0; +} + +static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { + return svreinterpret_s32_s64(svsel_s64(o, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y))); +} + +static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { + return svreinterpret_s32_s64( + svsub_s64_x(ptrue, svreinterpret_s64_s32(x), + svreinterpret_s64_s32(y))); +} + +static INLINE vmask vneg64_vm_vm(vmask x) { + return svreinterpret_s32_s64(svneg_s64_x(ptrue, svreinterpret_s64_s32(x))); +} + +static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { + return svcmpgt_s64(ptrue, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y)); +} + +#define vsll64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsl_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) +//@#define vsll64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsl_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) +#define vsrl64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) +//@#define vsrl64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) + +static INLINE vmask vcast_vm_vi(vint vi) { return svreinterpret_s32_s64(svextw_s64_z(ptrue, svreinterpret_s64_s32(vi))); } +static INLINE vint vcast_vi_vm(vmask vm) { return vand_vm_vm_vm(vm, vcast_vm_i_i(0, 0xffffffff)); } diff --git a/src/memory.cpp b/src/memory.cpp index 694fb09f..da3b601a 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -53,6 +53,8 @@ NSIMD_DLLEXPORT void *nsimd_aligned_alloc(nsimd_nat n) { #endif } +// ---------------------------------------------------------------------------- + NSIMD_DLLEXPORT void nsimd_aligned_free(void *ptr) { #ifdef NSIMD_IS_MSVC _aligned_free(ptr); @@ -63,16 +65,3 @@ NSIMD_DLLEXPORT void nsimd_aligned_free(void *ptr) { } // extern "C" -// ---------------------------------------------------------------------------- - -namespace nsimd { - -NSIMD_DLLEXPORT void *aligned_alloc(nsimd_nat n) { - return nsimd_aligned_alloc(n); -} - -NSIMD_DLLEXPORT void aligned_free(void *ptr) { - nsimd_aligned_free(ptr); -} - -} // namespace nsimd diff --git a/src/misc.h b/src/misc.h new file mode 100644 index 00000000..c13765cd --- /dev/null +++ b/src/misc.h @@ -0,0 +1,366 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// + +#ifndef __MISC_H__ +#define __MISC_H__ + +#if !defined(SLEEF_GENHEADER) +#include +#include +#endif + +#ifndef M_PI +#define M_PI 3.141592653589793238462643383279502884 +#endif + +#ifndef M_PIl +#define M_PIl 3.141592653589793238462643383279502884L +#endif + +#ifndef M_1_PI +#define M_1_PI 0.318309886183790671537767526745028724 +#endif + +#ifndef M_1_PIl +#define M_1_PIl 0.318309886183790671537767526745028724L +#endif + +#ifndef M_2_PI +#define M_2_PI 0.636619772367581343075535053490057448 +#endif + +#ifndef M_2_PIl +#define M_2_PIl 0.636619772367581343075535053490057448L +#endif + +#ifndef SLEEF_FP_ILOGB0 +#define SLEEF_FP_ILOGB0 ((int)-2147483648) +#endif + +#ifndef SLEEF_FP_ILOGBNAN +#define SLEEF_FP_ILOGBNAN ((int)2147483647) +#endif + +#define SLEEF_SNAN (((union { long long int i; double d; }) { .i = INT64_C(0x7ff0000000000001) }).d) +#define SLEEF_SNANf (((union { long int i; float f; }) { .i = 0xff800001 }).f) + + +// + +/* + PI_A to PI_D are constants that satisfy the following two conditions. + + * For PI_A, PI_B and PI_C, the last 28 bits are zero. + * PI_A + PI_B + PI_C + PI_D is close to PI as much as possible. + + The argument of a trig function is multiplied by 1/PI, and the + integral part is divided into two parts, each has at most 28 + bits. So, the maximum argument that could be correctly reduced + should be 2^(28*2-1) PI = 1.1e+17. However, due to internal + double precision calculation, the actual maximum argument that can + be correctly reduced is around 2^47. + */ + +#define PI_A 3.1415926218032836914 +#define PI_B 3.1786509424591713469e-08 +#define PI_C 1.2246467864107188502e-16 +#define PI_D 1.2736634327021899816e-24 +#define TRIGRANGEMAX 1e+14 + +/* + PI_A2 and PI_B2 are constants that satisfy the following two conditions. + + * The last 3 bits of PI_A2 are zero. + * PI_A2 + PI_B2 is close to PI as much as possible. + + The argument of a trig function is multiplied by 1/PI, and the + integral part is multiplied by PI_A2. So, the maximum argument that + could be correctly reduced should be 2^(3-1) PI = 12.6. By testing, + we confirmed that it correctly reduces the argument up to around 15. + */ + +#define PI_A2 3.141592653589793116 +#define PI_B2 1.2246467991473532072e-16 +#define TRIGRANGEMAX2 15 + +#define M_2_PI_H 0.63661977236758138243 +#define M_2_PI_L -3.9357353350364971764e-17 + +#define SQRT_DBL_MAX 1.3407807929942596355e+154 + +#define TRIGRANGEMAX3 1e+9 + +#define M_4_PI 1.273239544735162542821171882678754627704620361328125 + +#define L2U .69314718055966295651160180568695068359375 +#define L2L .28235290563031577122588448175013436025525412068e-12 +#define R_LN2 1.442695040888963407359924681001892137426645954152985934135449406931 + +#define L10U 0.30102999566383914498 // log 2 / log 10 +#define L10L 1.4205023227266099418e-13 +#define LOG10_2 3.3219280948873623478703194294893901758648313930 + +#define L10Uf 0.3010253906f +#define L10Lf 4.605038981e-06f + +// + +#define PI_Af 3.140625f +#define PI_Bf 0.0009670257568359375f +#define PI_Cf 6.2771141529083251953e-07f +#define PI_Df 1.2154201256553420762e-10f +#define TRIGRANGEMAXf 39000 + +#define PI_A2f 3.1414794921875f +#define PI_B2f 0.00011315941810607910156f +#define PI_C2f 1.9841872589410058936e-09f +#define TRIGRANGEMAX2f 125.0f + +#define TRIGRANGEMAX4f 8e+6f + +#define SQRT_FLT_MAX 18446743523953729536.0 + +#define L2Uf 0.693145751953125f +#define L2Lf 1.428606765330187045e-06f + +#define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f +#define M_PIf ((float)M_PI) + +// + +#ifndef MIN +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#endif + +#ifndef MAX +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#endif + +#ifndef ABS +#define ABS(x) ((x) < 0 ? -(x) : (x)) +#endif + +#define stringify(s) stringify_(s) +#define stringify_(s) #s + +#if !defined(SLEEF_GENHEADER) +typedef long double longdouble; +#endif + +#if !defined(Sleef_double2_DEFINED) && !defined(SLEEF_GENHEADER) +#define Sleef_double2_DEFINED +typedef struct { + double x, y; +} Sleef_double2; +#endif + +#if !defined(Sleef_float2_DEFINED) && !defined(SLEEF_GENHEADER) +#define Sleef_float2_DEFINED +typedef struct { + float x, y; +} Sleef_float2; +#endif + +#if !defined(Sleef_longdouble2_DEFINED) && !defined(SLEEF_GENHEADER) +#define Sleef_longdouble2_DEFINED +typedef struct { + long double x, y; +} Sleef_longdouble2; +#endif + +#if !defined(Sleef_quad_DEFINED) && !defined(SLEEF_GENHEADER) +#define Sleef_quad_DEFINED +#if defined(ENABLEFLOAT128) +typedef __float128 Sleef_quad; +#else +typedef struct { double x, y; } Sleef_quad; +#endif +#endif + +#if !defined(Sleef_quad1_DEFINED) && !defined(SLEEF_GENHEADER) +#define Sleef_quad1_DEFINED +typedef union { + struct { + Sleef_quad x; + }; + Sleef_quad s[1]; +} Sleef_quad1; +#endif + +#if !defined(Sleef_quad2_DEFINED) && !defined(SLEEF_GENHEADER) +#define Sleef_quad2_DEFINED +typedef union { + struct { + Sleef_quad x, y; + }; + Sleef_quad s[2]; +} Sleef_quad2; +#endif + +#if !defined(Sleef_quad4_DEFINED) && !defined(SLEEF_GENHEADER) +#define Sleef_quad4_DEFINED +typedef union { + struct { + Sleef_quad x, y, z, w; + }; + Sleef_quad s[4]; +} Sleef_quad4; +#endif + +#if !defined(Sleef_quad8_DEFINED) && !defined(SLEEF_GENHEADER) +#define Sleef_quad8_DEFINED +typedef union { + Sleef_quad s[8]; +} Sleef_quad8; +#endif + +#if defined(__ARM_FEATURE_SVE) && !defined(Sleef_quadx_DEFINED) && !defined(SLEEF_GENHEADER) +#define Sleef_quadx_DEFINED +typedef union { + Sleef_quad s[32]; +} Sleef_quadx; +#endif + +// + +#if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER) + +#define LIKELY(condition) __builtin_expect(!!(condition), 1) +#define UNLIKELY(condition) __builtin_expect(!!(condition), 0) +#define RESTRICT __restrict__ + +#ifndef __arm__ +#define ALIGNED(x) __attribute__((aligned(x))) +#else +#define ALIGNED(x) +#endif + +#if defined(SLEEF_GENHEADER) + +#define INLINE SLEEF_ALWAYS_INLINE +#define EXPORT SLEEF_INLINE +#define CONST SLEEF_CONST +#define NOEXPORT + +#else // #if defined(SLEEF_GENHEADER) + +#ifndef __INTEL_COMPILER +#define CONST const +#else +#define CONST +#endif +#define INLINE __attribute__((always_inline)) + +#if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) +#ifndef SLEEF_STATIC_LIBS +#define EXPORT __stdcall __declspec(dllexport) +#define NOEXPORT +#else // #ifndef SLEEF_STATIC_LIBS +#define EXPORT +#define NOEXPORT +#endif // #ifndef SLEEF_STATIC_LIBS +#else // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) +#define EXPORT __attribute__((visibility("default"))) +#define NOEXPORT __attribute__ ((visibility ("hidden"))) +#endif // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) + +#endif // #if defined(SLEEF_GENHEADER) + +#define SLEEF_NAN __builtin_nan("") +#define SLEEF_NANf __builtin_nanf("") +#define SLEEF_NANl __builtin_nanl("") +#define SLEEF_INFINITY __builtin_inf() +#define SLEEF_INFINITYf __builtin_inff() +#define SLEEF_INFINITYl __builtin_infl() + +#if defined(__INTEL_COMPILER) || defined (__clang__) +#define SLEEF_INFINITYq __builtin_inf() +#define SLEEF_NANq __builtin_nan("") +#else +#define SLEEF_INFINITYq __builtin_infq() +#define SLEEF_NANq (SLEEF_INFINITYq - SLEEF_INFINITYq) +#endif + +#elif defined(_MSC_VER) // #if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER) + +#define INLINE __forceinline +#define CONST +#define RESTRICT +#define ALIGNED(x) +#define LIKELY(condition) (condition) +#define UNLIKELY(condition) (condition) + +#ifndef SLEEF_STATIC_LIBS +#define EXPORT __declspec(dllexport) +#define NOEXPORT +#else +#define EXPORT +#define NOEXPORT +#endif + +#if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) && !defined(SLEEF_GENHEADER) +#include +#endif + +#define SLEEF_INFINITY (1e+300 * 1e+300) +#define SLEEF_NAN (SLEEF_INFINITY - SLEEF_INFINITY) +#define SLEEF_INFINITYf ((float)SLEEF_INFINITY) +#define SLEEF_NANf ((float)SLEEF_NAN) +#define SLEEF_INFINITYl ((long double)SLEEF_INFINITY) +#define SLEEF_NANl ((long double)SLEEF_NAN) + +#if (defined(_M_AMD64) || defined(_M_X64)) +#ifndef __SSE2__ +#define __SSE2__ +#define __SSE3__ +#define __SSE4_1__ +#endif +#elif _M_IX86_FP == 2 +#ifndef __SSE2__ +#define __SSE2__ +#define __SSE3__ +#define __SSE4_1__ +#endif +#elif _M_IX86_FP == 1 +#ifndef __SSE__ +#define __SSE__ +#endif +#endif + +#endif // #elif defined(_MSC_VER) // #if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER) + +#if !defined(__linux__) +#define isinff(x) ((x) == SLEEF_INFINITYf || (x) == -SLEEF_INFINITYf) +#define isinfl(x) ((x) == SLEEF_INFINITYl || (x) == -SLEEF_INFINITYl) +#define isnanf(x) ((x) != (x)) +#define isnanl(x) ((x) != (x)) +#endif + +#endif // #ifndef __MISC_H__ + +#ifdef ENABLE_AAVPCS +#define VECTOR_CC __attribute__((aarch64_vector_pcs)) +#else +#define VECTOR_CC +#endif + + + /* NSIMD specific */ + #ifndef NSIMD_SLEEF_MISC_H + #define NSIMD_SLEEF_MISC_H + + #ifdef INLINE + #undef INLINE + #endif + #define INLINE inline + + #define Sleef_rempitabdp nsimd_sleef_rempitab_f64 + #define Sleef_rempitabsp nsimd_sleef_rempitab_f32 + + #endif + + \ No newline at end of file diff --git a/src/rempitab.c b/src/rempitab.c new file mode 100644 index 00000000..200d8ea4 --- /dev/null +++ b/src/rempitab.c @@ -0,0 +1,1090 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#include "misc.h" + +#if !defined(SLEEF_GENHEADER) +#define FUNCATR NOEXPORT ALIGNED(64) +#else +#define FUNCATR EXPORT ALIGNED(64) +#endif + +FUNCATR const double Sleef_rempitabdp[] = { + 0.15915494309189531785, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, + 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, + 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, + 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, + 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, + 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, + 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, + 0.00095181809189533563356, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762436344e-52, + 0.00095181809189533563356, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762436344e-52, + 0.00046353684189533574198, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.301187206862134399e-54, + 0.00021939621689533574198, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.301187206862134399e-54, + 9.7325904395335769087e-05, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 3.6290748145335769087e-05, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 5.7731700203357690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 5.7731700203357690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 5.7731700203357690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 1.9584727547107690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, + 2.1321799510573569745e-08, 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369025999e-57, + 6.4206383167259151492e-09, -1.3585460269359374382e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, + 6.4206383167259151492e-09, -1.3585460269359374382e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, + 2.6953480182640010867e-09, -1.3585460269359374382e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, + 8.3270286903304384868e-10, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, + 8.3270286903304384868e-10, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, + 3.6704158172530459087e-10, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, + 1.3421093807143501366e-10, 1.9241762160098927996e-26, 3.9750282589222551507e-42, 7.9392906424978921242e-59, + 1.7795616244500218596e-11, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, + 1.7795616244500218596e-11, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, + 1.7795616244500218596e-11, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, + 3.2437010161333667893e-12, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, + 3.2437010161333667893e-12, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, + 3.2437010161333667893e-12, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, + 1.4247116125875099096e-12, 2.5861333686050385673e-28, 2.8971783383570358633e-44, -2.6168913164368963837e-61, + 5.1521691081458187359e-13, 5.6664945123924856962e-29, 6.5510079543732854985e-45, -2.6168913164368963837e-61, + 6.0469559928117805118e-14, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, + 6.0469559928117805118e-14, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, + 6.0469559928117805118e-14, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, + 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, + 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, + 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, + 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, + 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, + 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, + 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, + 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, + 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, + 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, + 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, + 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, + 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, + 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, + 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, + 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, + 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, + 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762432635e-52, 3.5887057810247033998e-68, + 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762432635e-52, 3.5887057810247033998e-68, + 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, + 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, + 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, + 1.3348904870778067446e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, + 6.5726412927436632287e-21, 1.0820844071023395684e-36, 1.7634898158762432635e-52, 3.5887057810247033998e-68, + 3.1845095037264626247e-21, 3.2976802257607573031e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, + 1.4904436092178623228e-21, -4.6390169687056261795e-38, -1.1392999419355048437e-54, -4.587677453735884283e-71, + 6.4341066196356198368e-22, -4.6390169687056261795e-38, -1.1392999419355048437e-54, -4.587677453735884283e-71, + 2.1989418833641172011e-22, 4.7649378378726728402e-38, 9.3011872068621332399e-54, 1.113250147552460308e-69, + 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, + 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, + 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, + 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, + 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, + 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, + 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, + 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, + 6.9132600985943383921e-25, 7.8591368887290111994e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, + 2.7773570358292009361e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, -3.2399200798614356002e-74, + 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, + 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, + 1.9241762160098927996e-26, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, + 1.9241762160098927996e-26, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, + 6.317065088957874881e-27, -3.2976062348358281152e-43, -2.6168913164368963837e-61, 3.7036201000008290615e-78, + 6.317065088957874881e-27, -3.2976062348358281152e-43, -2.6168913164368963837e-61, 3.7036201000008290615e-78, + 3.0858908211726098086e-27, 3.8770419025072344914e-43, 7.9392906424978921242e-59, 2.9745456030524896742e-75, + 1.4703036872799779898e-27, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008290615e-78, + 6.625101203336619011e-28, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008290615e-78, + 2.5861333686050385673e-28, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008290615e-78, + 5.6664945123924856962e-29, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008290615e-78, + 5.6664945123924856962e-29, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008290615e-78, + 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, + 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, + 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, + 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, + 3.0224035688960604996e-30, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, + 1.4446817584540368888e-30, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, + 6.5582085323302525856e-31, 7.0002556871006273225e-47, 1.0567786762735315635e-62, -6.1446417754639313137e-79, + 2.6139040062251944343e-31, -1.7578597149294783985e-47, 8.4432539107728090768e-64, 1.9517662449371102229e-79, + 6.4175174317266470186e-32, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371102229e-79, + 6.4175174317266470186e-32, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371102229e-79, + 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659569668e-65, -7.2335760163150273591e-81, + 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659569668e-65, -7.2335760163150273591e-81, + 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, + 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, + 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, + 1.0046721413651383112e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, + 2.3430016361024414106e-34, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, + 2.3430016361024414106e-34, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, + 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.4185069655957361252e-83, + 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.4185069655957361252e-83, + 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.4185069655957361252e-83, + 1.7633044866680145008e-35, 2.8491136916798196016e-51, 4.0680767287898916022e-67, 1.4185069655957361252e-83, + 5.595982714259923599e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, + 5.595982714259923599e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, + 2.5867171761548675786e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, + 1.0820844071023395684e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, + 3.2976802257607573031e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, + 3.2976802257607573031e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, + 1.4168892644450972904e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, + 4.7649378378726728402e-38, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, + 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 7.8591368887290111994e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 7.8591368887290111994e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 3.2673620808294506214e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, + 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, + 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, + 3.9750282589222551507e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, + 1.1051690039850297894e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, + 1.1051690039850297894e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, + 3.8770419025072344914e-43, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, + 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, + 7.0002556871006273225e-47, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.5355611056488084652e-94, + 7.0002556871006273225e-47, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.5355611056488084652e-94, + 2.6211979860855749482e-47, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, + 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, + 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, + 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, + 1.5797802926460750146e-48, 2.3660905534865399025e-64, -7.2335760163150273591e-81, 2.8738690232659205689e-99, + 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, + 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, + 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, + 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, + 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, + 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, + 1.8885701952232994665e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, + 8.1946431118642097069e-51, 1.5937536410989638719e-66, 1.459625439463388979e-82, 2.8738690232659205689e-99, + 2.8491136916798196016e-51, 4.0680767287898916022e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, + 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, + 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, + 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, + 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, + 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, + 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, + 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, + 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, + 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, + 4.0809436324633147776e-54, -4.587677453735884283e-71, -2.8859500138942368532e-87, -5.6567402911297190423e-103, + 1.470821845263904967e-54, -4.587677453735884283e-71, -2.8859500138942368532e-87, -5.6567402911297190423e-103, + 1.6576095166419998917e-55, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630537605e-103, + 1.6576095166419998917e-55, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630537605e-103, + 1.6576095166419998917e-55, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630537605e-103, + 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, + 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, + 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, + 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, + 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, + 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, + 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, + 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, + 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, + 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, + 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, + 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, + 3.9565608646667614317e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, + 1.9651959757511960854e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, + 9.6951353129341363331e-60, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, + 4.7167230906452229674e-60, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, + 2.2275169795007668372e-60, 2.1097166542226745549e-76, 4.4670685979800101779e-92, 1.0451839188820145747e-108, + 9.8291392392853877215e-61, -6.5385728340754726503e-77, -1.3520652573660833788e-93, -2.3220403312043059402e-109, + 3.6061239614242446325e-61, 7.2792968540756372162e-77, 1.3988851821689310822e-92, 1.0451839188820145747e-108, + 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, + 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, + 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, + 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.535561105648808199e-94, -1.9306041120023063932e-110, + 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.535561105648808199e-94, -1.9306041120023063932e-110, + 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, + 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, + 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, + 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, + 2.3660905534865399025e-64, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, + 2.3660905534865399025e-64, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, + 8.4679971416497210292e-65, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, + 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, + 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, + 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, + 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, + 3.9676455775389135587e-66, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, + 1.5937536410989638719e-66, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, + 4.0680767287898916022e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, + 4.0680767287898916022e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, + 1.1007118082399544936e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, + 1.1007118082399544936e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, + 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142805974e-99, 1.8395411057335783574e-115, + 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142805974e-99, 1.8395411057335783574e-115, + 1.7341027056809927069e-68, 1.830931441234090934e-84, 1.3069928418846076386e-100, 3.1677600334418876704e-116, + 8.0680116800913756637e-69, -2.2809159455312046184e-85, -4.0748824503880445403e-101, -6.3915272253158644628e-117, + 3.4315039917320989315e-69, -2.2809159455312046184e-85, -4.0748824503880445403e-101, -6.3915272253158644628e-117, + 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, + 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, + 5.3368668650755071652e-70, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, + 2.4390495598509592076e-70, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, + 9.901409072386855505e-71, -2.8859500138942368532e-87, -5.6567402911297190423e-103, -4.6672632026740766185e-119, + 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, + 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, + 8.4572999356014273536e-72, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, + 8.4572999356014273536e-72, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, + 3.9294603961880721752e-72, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894729832e-121, + 1.6655406264813940833e-72, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894729832e-121, + 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, + 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, + 2.5059077041472040156e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, + 1.0909578480805302081e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, + 3.8348292004719330442e-74, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, + 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, + 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, + 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, + 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, + 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, + 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, + 2.1097166542226745549e-76, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, + 2.1097166542226745549e-76, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, + 7.2792968540756372162e-77, 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, + 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, + 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, + 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, + 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, + 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, + 1.5445779612272179051e-78, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251472933e-126, + 4.6505689184041232695e-79, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251472933e-126, + 4.6505689184041232695e-79, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251472933e-126, + 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, + 6.0236490820360325022e-80, -3.7424672147304925625e-96, -1.784871512364483542e-112, 6.7095375687163151728e-129, + 6.0236490820360325022e-80, -3.7424672147304925625e-96, -1.784871512364483542e-112, 6.7095375687163151728e-129, + 2.6501457402022643213e-80, 3.7482149527770239293e-96, 6.5314563001514349095e-112, 9.9039323746573674262e-128, + 9.6339406928538097998e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, + 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, + 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, + 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, + 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, + 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, + 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, + 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, + 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, + 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, + 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, + 5.9489775128085131541e-84, 1.0450891972142805974e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, + 1.830931441234090934e-84, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247800778e-132, + 1.830931441234090934e-84, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247800778e-132, + 8.0141992334048515034e-85, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247800778e-132, + 2.8666416439368237283e-85, 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, + 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, + 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, + 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, + 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, + 1.3200167453193350837e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, + 5.1571087196495574384e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, + 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, + 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, + 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894725532e-121, -3.1562414818576682143e-137, + 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894725532e-121, -3.1562414818576682143e-137, + 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894725532e-121, -3.1562414818576682143e-137, + 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, + 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, + 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, + 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, + 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, + 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534162772e-139, + 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534162772e-139, + 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534162772e-139, + 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, + 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, + 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, + 4.4670685979800101779e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, + 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, + 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, + 6.3183932821616130831e-93, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, + 2.4831640123977650651e-93, 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007823264e-142, + 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251471293e-126, 1.2214168761472102282e-142, + 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251471293e-126, 1.2214168761472102282e-142, + 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, + 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, + 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, + 2.62202614552995759e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, + 2.62202614552995759e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, + 1.1238897120284541253e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, + 3.7482149527770239293e-96, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 1.0450891972142805974e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, + 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.5448727249069983612e-148, + 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.5448727249069983612e-148, + 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.5448727249069983612e-148, + 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, + 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, + 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, + 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, + 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, + 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, + 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, + 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, + 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, + 1.0404514546648604359e-103, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, + 1.0404514546648604359e-103, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, + 4.8235214251531210473e-104, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, + 2.0330248644053793915e-104, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, + 6.3777658403150887343e-105, -2.0152904854894725532e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, + 6.3777658403150887343e-105, -2.0152904854894725532e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, + 2.88964513938041089e-105, 5.7298933442091639924e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, + 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852681095e-137, 2.4782675885631257398e-153, + 2.7355461367940366859e-106, -7.8994528064813712419e-123, -2.0037599452814940222e-138, 9.1598554579059548847e-155, + 2.7355461367940366859e-106, -7.8994528064813712419e-123, -2.0037599452814940222e-138, 9.1598554579059548847e-155, + 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, + 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, + 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, + 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, + 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, + 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, + 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, + 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, + 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, + 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, + 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, + 8.7142954880180709975e-110, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, + 3.3918456880078814158e-110, 6.931443500908017045e-126, 1.1062055705591186799e-141, 1.1734404793201255869e-157, + 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220312367e-159, + 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220312367e-159, + 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, + 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, + 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, + 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, + 2.3732923938934761454e-112, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, + 2.9421044076449630171e-113, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, + 2.9421044076449630171e-113, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, + 2.9421044076449630171e-113, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, + 3.4325196623373878948e-114, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, + 3.4325196623373878948e-114, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, + 3.4325196623373878948e-114, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, + 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, + 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, + 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, + 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, + 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, + 8.2436437080731844263e-116, 1.4726412753514008951e-131, -3.9681466199873824165e-148, 2.9106774506606945839e-164, + 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.544872724906996972e-148, 1.6802919634942429241e-163, + 6.2981819612623816536e-117, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254779927e-164, + 6.2981819612623816536e-117, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254779927e-164, + 6.2981819612623816536e-117, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254779927e-164, + 3.1257546646178208289e-117, -6.6414926959353515111e-134, -5.7828074707888119584e-150, -1.2825052715093464343e-165, + 1.5395410162955400644e-117, -6.6414926959353515111e-134, -5.7828074707888119584e-150, -1.2825052715093464343e-165, + 7.4643419213439950602e-118, 1.0969016447485317626e-133, -5.7828074707888119584e-150, -1.2825052715093464343e-165, + 3.4988078005382940294e-118, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, + 1.5160407401354430737e-118, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, + 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, + 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, + 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, + 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, + 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, + 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, + 1.3475077173907800538e-120, -3.156241481857667737e-137, -7.0684085473731388916e-153, -3.3573283875161501977e-170, + 5.7298933442091639924e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, -3.3573283875161501977e-170, + 1.8573014293598452896e-121, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, + 1.8573014293598452896e-121, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, + 8.8915345064751572143e-122, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, + 4.0507946129135104481e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, + 1.6304246661326865276e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, + 4.2023969274227456735e-123, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, + 4.2023969274227456735e-123, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, + 1.1769344939467164447e-123, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064369683e-172, + 1.1769344939467164447e-123, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064369683e-172, + 4.2056888557770896953e-124, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064369683e-172, + 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, + 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, + 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, + 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, + 1.8749656131673758844e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, + 6.931443500908017045e-126, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, + 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, + 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, + 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, + 2.8369889610228834887e-127, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, + 2.8369889610228834887e-127, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, + 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657612913e-160, -2.5389576707476506925e-176, + 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, + 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, + 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, + 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, + 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691155518e-177, + 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691155518e-177, + 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691155518e-177, + 2.175994780857201024e-130, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, + 2.175994780857201024e-130, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, + 3.7267864457092460442e-131, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, + 3.7267864457092460442e-131, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, + 3.7267864457092460442e-131, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, + 1.4726412753514008951e-131, -3.9681466199873824165e-148, 2.9106774506606941983e-164, 5.1948630316441296498e-180, + 3.4556869017247794521e-132, 8.544872724906996972e-148, 1.6802919634942426156e-163, 2.8330093736631818036e-179, + 3.4556869017247794521e-132, 8.544872724906996972e-148, 1.6802919634942426156e-163, 2.8330093736631818036e-179, + 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795152755e-180, + 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795152755e-180, + 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795152755e-180, + 2.8579525590905986764e-133, -5.7828074707888119584e-150, -1.2825052715093464343e-165, -1.0696067158221530218e-181, + 1.0969016447485317626e-133, -5.7828074707888119584e-150, -1.2825052715093464343e-165, -1.0696067158221530218e-181, + 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.3535321672928907047e-182, + 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.3535321672928907047e-182, + 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.3535321672928907047e-182, + 1.0631050543111905033e-134, 1.5490398016102376505e-150, 3.4549185946116918017e-166, 1.3535321672928907047e-182, + 5.1277664357929471499e-135, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, + 2.3761243821334675971e-135, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, + 1.0003033553037281263e-135, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, + 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, + 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, + 1.4041521353514076604e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, + 5.4426399358282049106e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, + 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, + 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, + 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, + 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, + 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, + 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, + 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, + 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, + 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, + 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, + 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, + 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, + 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, + 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, + 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, + 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, + 4.5016298192952031469e-142, -2.8326669474241479263e-158, 1.2381024895275844856e-174, -8.4789520282639751913e-191, + 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, + 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, + 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, -6.2404128071707654958e-193, + 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, -6.2404128071707654958e-193, + 1.9635033141346264592e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, -6.2404128071707654958e-193, + 9.3843676940087855824e-144, 1.2626949989038732076e-159, 2.2730883653953564668e-175, 2.7431118386590483722e-191, + 4.2590349703400483539e-144, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896458822e-192, + 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896458822e-192, + 4.1503542758849472122e-145, -1.7614040799531193879e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, + 4.1503542758849472122e-145, -1.7614040799531193879e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, + 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, + 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, + 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, + 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, + 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, + 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, + 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, + 2.105789206980137775e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, + 8.544872724906996972e-148, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, + 2.2883630524598079723e-148, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091032843e-196, + 2.2883630524598079723e-148, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091032843e-196, + 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795150614e-180, 1.1067843414450286726e-196, + 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795150614e-180, 1.1067843414450286726e-196, + 3.3320377982006123631e-149, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, + 1.3768785255608653665e-149, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, + 3.9929888924099219388e-150, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, + 3.9929888924099219388e-150, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, + 1.5490398016102376505e-150, 3.4549185946116918017e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, + 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, + 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, + 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, + 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, + 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, + 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, + 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.4980560800565462618e-202, + 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.4980560800565462618e-202, + 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.4980560800565462618e-202, + 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, + 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, + 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, + 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, + 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, + 1.7015147267057481414e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, + 1.7015147267057481414e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, + 1.7015147267057481414e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, + 7.6922213530572229852e-156, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, + 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, -3.2905064432040069127e-204, + 7.0002691755702864582e-157, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, + 7.0002691755702864582e-157, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, + 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, + 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, + 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, + 4.4508689228885539715e-158, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, + 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, + 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, + 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, + 3.5387999583765925506e-159, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.3321093418096261919e-207, + 1.2626949989038732076e-159, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.3321093418096261919e-207, + 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, + 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, + 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, + 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, + 5.3514239183991277695e-161, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, + 1.7950099192230045857e-161, -1.6991004655691153326e-177, -1.8567941091539589297e-193, -1.8074851186411640793e-209, + 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, + 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, + 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, + 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, + 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, + 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, + 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, + 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189357559e-211, + 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189357559e-211, + 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189357559e-211, + 1.1741471776254777999e-164, 1.3389912474795150614e-180, 1.106784341445028435e-196, 3.3045982549756583552e-212, + 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, + 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, + 8.8815756978467430465e-166, 1.3403131492807310959e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, + 8.8815756978467430465e-166, 1.3403131492807310959e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, + 3.4549185946116918017e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, + 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, + 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, + 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, + 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, + 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, + 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, + 2.0862146470760309789e-168, -1.146150630053972131e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, + 2.0862146470760309789e-168, -1.146150630053972131e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, + 1.026320681600434562e-168, 1.2072867382105631402e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, + 4.9637369886263658882e-169, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, + 2.3140020749373754342e-169, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, + 9.8913461809288020723e-170, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, + 3.2670088967063259373e-170, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, + 3.2670088967063259373e-170, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, + 1.6109245756507072713e-170, -6.2044048008378732802e-187, -5.4322544592823556944e-203, 4.2491789852161138683e-219, + 7.8288241512289757055e-171, 1.2181824638728806485e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, + 3.6886133485899290404e-171, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161138683e-219, + 1.6185079472704052482e-171, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161138683e-219, + 5.8345524661064358191e-172, 6.9043123899963188689e-188, -3.2905064432040069127e-204, -9.1795828160190082842e-224, + 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190082842e-224, + 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190082842e-224, + 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190082842e-224, + 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, + 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, + 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, + 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, + 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, + 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, + 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, + 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, + 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, + 1.0095962991602958391e-175, -6.2404128071707654958e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, + 3.7785026604276538491e-176, -6.2404128071707654958e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, + 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578162463e-225, + 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578162463e-225, + 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578162463e-225, + 2.2493122414154495675e-177, 2.5268245888628466632e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, + 2.7510588792316711745e-178, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450500218e-227, + 2.7510588792316711745e-178, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450500218e-227, + 2.7510588792316711745e-178, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450500218e-227, + 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, + 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, + 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, + 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, + 1.2906606599973359683e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, + 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189355449e-211, 1.6821693549018732055e-227, + 1.3389912474795150614e-180, 1.106784341445028435e-196, 3.3045982549756578275e-212, 6.2685154049107876715e-228, + 1.3389912474795150614e-180, 1.106784341445028435e-196, 3.3045982549756578275e-212, 6.2685154049107876715e-228, + 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 2.5658818466966882188e-231, + 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 2.5658818466966882188e-231, + 1.3403131492807310959e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 2.5658818466966882188e-231, + 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, + 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, + 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, + 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, + 6.0043220944823941786e-183, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, + 2.2388223052591377446e-183, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, + 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 1.2072867382105631402e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 1.2181824638728806485e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, + 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161132393e-219, 7.4467067939231424594e-235, + 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161132393e-219, 7.4467067939231424594e-235, + 6.9043123899963188689e-188, -3.2905064432040069127e-204, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 6.9043123899963188689e-188, -3.2905064432040069127e-204, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 4.4040360264865697732e-189, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 8.129755890712020335e-190, 9.8339840169166049336e-206, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 8.129755890712020335e-190, 9.8339840169166049336e-206, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 8.129755890712020335e-190, 9.8339840169166049336e-206, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 3.6409303439428119063e-190, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 1.3965175705582071936e-190, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 1.3403538552936701153e-191, 1.7826390804083638359e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, + 6.389748636109812983e-192, 2.2526486929936882202e-208, -5.3441928036578156465e-225, -7.741539335184153052e-241, + 2.8828536776963681193e-192, 2.2526486929936882202e-208, -5.3441928036578156465e-225, -7.741539335184153052e-241, + 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578156465e-225, -7.741539335184153052e-241, + 2.5268245888628466632e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 4.2560351759808952526e-241, + 2.5268245888628466632e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 4.2560351759808952526e-241, + 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450490845e-227, 1.3186893776791012681e-242, + 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450490845e-227, 1.3186893776791012681e-242, + 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450490845e-227, 1.3186893776791012681e-242, + 6.1039071228393547627e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, + 6.1039071228393547627e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, + 6.1039071228393547627e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, + 2.6792050150137250131e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, + 9.6685396110091013832e-196, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, + 2.0416567491425607157e-177, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, + 2.0416567491425607157e-177, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, + 2.0416567491425607157e-177, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, + 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, + 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, + 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, + 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, + 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, + 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 1.9005753194802080146e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 1.9005753194802080146e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 9.3660737343905436753e-181, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 4.5462340041847754398e-181, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 2.1363141390818913221e-181, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 9.3135420653044926323e-182, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 3.2887424025472810002e-182, 7.185309278132283136e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, + 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, + 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, + 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, + 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, + 8.806758170751374203e-184, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749095611e-233, + 8.806758170751374203e-184, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749095611e-233, + 4.0998834342223036605e-184, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749095611e-233, + 1.7464460659577689118e-184, 2.612671019845610006e-200, 2.1334073625072069974e-216, -9.2331809177749095611e-233, + 5.697273818255015375e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 5.697273818255015375e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 2.755477107924346286e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 1.2845787527590117414e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 5.4912957517634446918e-186, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 1.8140498638501083305e-186, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 1.8140498638501083305e-186, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 8.9473839187177424013e-187, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 4.3508265588260719497e-187, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 2.0525478788802367239e-187, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 9.0340853890731911095e-188, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 3.288388689208603045e-188, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, + 4.1554033927630885323e-189, -9.8582956929636044137e-206, -1.4280619485269765742e-221, 1.2171222696290252021e-237, + 4.1554033927630885323e-189, -9.8582956929636044137e-206, -1.4280619485269765742e-221, 1.2171222696290252021e-237, + 4.1554033927630885323e-189, -9.8582956929636044137e-206, -1.4280619485269765742e-221, 1.2171222696290252021e-237, + 5.643429553477207926e-190, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, + 5.643429553477207926e-190, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, + 5.643429553477207926e-190, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, + 1.1546040067079994973e-190, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, + 1.1546040067079994973e-190, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, + 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, + 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, + 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, + 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, + 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, + 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, + 1.4863145223629928288e-192, -7.9038076992129241506e-209, -1.609965144193984205e-224, -1.8313007053436627876e-240, + 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, -1.8313007053436627876e-240, + 1.712289129579509076e-193, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, + 1.712289129579509076e-193, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, + 6.1638445507530779946e-194, -6.0361608463951204924e-210, 1.1003018740995688645e-226, 5.827891678485165325e-243, + 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, + 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, + 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, + 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, + 3.418509674495068119e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, + 1.7061586205822532442e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, + 8.499830936258458068e-196, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, + 4.218953301476420881e-196, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, + 2.0785144840854027628e-196, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, + 1.008295075389893466e-196, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, + 4.7318537104213881764e-197, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, + 2.0563051886826149345e-197, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, + 7.185309278132283136e-198, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, + 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 2.612671019845610006e-200, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 2.612671019845610006e-200, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 1.306250843215349634e-200, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, + 6.5304075490021959302e-201, 6.8298960257742791824e-217, 6.8696910062179237095e-233, 3.8349029251851101018e-249, + 3.2643571074265457254e-201, -4.2219277387461470355e-218, -1.753154605289404553e-234, -7.5861268822635538093e-251, + 1.6313318866387202604e-201, -4.2219277387461470355e-218, -1.753154605289404553e-234, -7.5861268822635538093e-251, + 8.1481927624480752786e-202, -4.2219277387461470355e-218, -1.753154605289404553e-234, -7.5861268822635538093e-251, + 4.0656297104785107096e-202, 4.8431832608149701961e-218, 8.3111403472061145651e-234, 1.6001805286092554504e-249, + 2.0243481844937293316e-202, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, + 1.0037074215013384159e-202, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, + 4.9338704000514295811e-203, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, + 2.3822684925704522921e-203, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, + 1.1064675388299639308e-203, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608782288e-251, + 4.6856706195971960852e-204, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608782288e-251, + 1.4961682352459748279e-204, -8.0675475439086544798e-221, -3.6970842501441777651e-237, -5.7032870362481275794e-253, + 1.4961682352459748279e-204, -8.0675475439086544798e-221, -3.6970842501441777651e-237, -5.7032870362481275794e-253, + 6.9879263915816924805e-205, 9.6377473771091526132e-221, 1.5959741828948633012e-236, 2.7031904319843495713e-252, + 3.0010484111426663515e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, + 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, + 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, + 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, + 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, + 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, + 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, + 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, + 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, + 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436625212e-240, -2.3341145329525059632e-256, + 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436625212e-240, -2.3341145329525059632e-256, + 1.156336993964950812e-208, 2.7126166236326293347e-224, -1.8313007053436625212e-240, -2.3341145329525059632e-256, + 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, + 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, + 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, + 6.1308251778939023781e-210, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, + 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 2.3568521170701555846e-212, -7.7818310317651142243e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 1.1686698881356804311e-212, 1.8601114328504743806e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 5.7457877366844311816e-213, 5.409641648369814791e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, + 2.7753321643482446169e-213, -1.1860946916976500828e-229, 6.3146909508553973881e-246, 1.2573885592501532045e-261, + 1.290104378180150675e-213, 2.1117734783360818049e-229, 4.2928382696354204061e-245, -2.8075477999879273582e-261, + 5.4749048509610403382e-214, 4.6283939331921604413e-230, 6.3146909508553973881e-246, 1.2573885592501532045e-261, + 1.7618353855408067201e-214, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, + 1.7618353855408067201e-214, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, + 8.3356801918574821257e-215, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, + 3.6943433600821895879e-215, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, + 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, + 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, + 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, + 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, + 6.8298960257742791824e-217, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, + 6.8298960257742791824e-217, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, + 3.2038516259498326923e-217, -1.1817449557784924788e-233, -6.3454186796659920093e-250, -2.6436684620390282645e-267, + 1.3908294260376086421e-217, 2.8439730252197153919e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, + 4.8431832608149701961e-218, 8.3111403472061145651e-234, 1.6001805286092554504e-249, -2.6436684620390282645e-267, + 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, + 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, + 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, + 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, + 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, + 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, + 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, + 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, + 9.6377473771091526132e-221, 1.5959741828948633012e-236, 2.7031904319843490867e-252, 2.638005906844372114e-268, + 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, + 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, + 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, + 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, + 2.318094503184431479e-222, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, + 2.318094503184431479e-222, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, + 9.3486833747991514629e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, + 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, + 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, + 7.0351983914592419146e-224, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539155726e-272, + 7.0351983914592419146e-224, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539155726e-272, + 2.7126166236326293347e-224, -1.8313007053436625212e-240, -2.3341145329525056675e-256, -2.0046830753539155726e-272, + 5.5132573971932232487e-225, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, + 5.5132573971932232487e-225, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, + 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, + 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, + 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, + 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, + 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, + 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, + 2.560476225709334075e-227, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, + 2.560476225709334075e-227, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, + 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, + 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, + 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, + 1.8601114328504743806e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, + 5.409641648369814791e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, + 5.409641648369814791e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, + 2.1117734783360818049e-229, 4.2928382696354204061e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, + 4.6283939331921604413e-230, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, + 4.6283939331921604413e-230, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, + 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, + 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, + 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, + 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, + 2.4841276986611042098e-231, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, + 1.1958979447416775482e-231, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, + 5.5178306778196421733e-232, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, + 2.2972562930210755192e-232, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, + 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, + 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, + 2.8439730252197153919e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, + 8.3111403472061145651e-234, 1.6001805286092554504e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, + 8.3111403472061145651e-234, 1.6001805286092554504e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, + 3.2789928709583552854e-234, 4.8281933032132812475e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, + 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, + 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, + 1.3390069830350552605e-235, -6.026193929640082176e-252, -7.0535576022338457803e-268, -4.3807022524130141006e-284, + 1.3390069830350552605e-235, -6.026193929640082176e-252, -7.0535576022338457803e-268, -4.3807022524130141006e-284, + 1.3390069830350552605e-235, -6.026193929640082176e-252, -7.0535576022338457803e-268, -4.3807022524130141006e-284, + 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, -4.3807022524130141006e-284, + 1.5959741828948633012e-236, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, + 1.5959741828948633012e-236, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, + 6.1313287894022281692e-237, 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006739096e-285, + 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, + 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, + 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, + 6.0284645465737476297e-238, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, + 2.9570854717154947523e-238, 4.3456134301905148502e-254, 6.3684349745470443788e-270, -9.5347405022956042207e-287, + 1.4213959342863689955e-238, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956042207e-287, + 6.5355116557180594664e-239, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956042207e-287, + 2.6962878121452450746e-239, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, + 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, + 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, + 2.9677290991223565342e-240, -2.3341145329525056675e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, + 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, + 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, + 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, + 2.6827483411022054912e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, + 1.1830515272065748694e-241, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, + 4.3320312025875939195e-242, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, + 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, + 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, + 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, + 1.1413391350613183311e-243, -5.1586784110844895013e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, + 1.1413391350613183311e-243, -5.1586784110844895013e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, + 1.1413391350613183311e-243, -5.1586784110844895013e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, + 5.5552006713333735927e-244, 7.8491179384773690214e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, + 2.6261053316934700345e-244, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997740506e-292, + 1.1615576618735179302e-244, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997740506e-292, + 4.2928382696354204061e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 2.8287088295287585094e-294, + 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, + 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, + 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, + 1.7379794826680480784e-246, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, + 1.7379794826680480784e-246, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, + 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580810531e-280, 8.8634899828990930877e-296, + 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, + 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, + 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, + 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, + 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, + 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 1.6001805286092554504e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 4.8281933032132812475e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 4.8281933032132812475e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 2.0347903074934629333e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 6.3808880963355377617e-251, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 6.3808880963355377617e-251, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 2.8891343516857640937e-251, 5.1095823452235464813e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 1.1432574793608780349e-251, 1.2329569415922591084e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, + 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, + 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, + 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, + 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, + 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, + 2.4805108027747776379e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, + 1.1165444962709601017e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, + 4.3456134301905148502e-254, 6.3684349745470443788e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, + 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, + 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, + 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, + 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, + 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, + 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, + 2.9938788518280315834e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, + 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, + 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, + 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, + 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, + 1.6338236616337094706e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, + 8.0132469526175071002e-258, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, + 3.850752120757712373e-258, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, + 1.7695047048278150093e-258, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, + 7.2888099686286655858e-259, 5.581381609158630475e-275, 6.1155422068568946933e-291, 1.0380272777574237546e-306, + 2.0856914288039227544e-259, -1.9524039360882352712e-276, -2.9779654517181712829e-292, -3.000817432603284506e-308, + 2.0856914288039227544e-259, -1.9524039360882352712e-276, -2.9779654517181712829e-292, -3.000817432603284506e-308, + 7.8491179384773690214e-260, -1.9524039360882352712e-276, -2.9779654517181712829e-292, -3.000817432603284506e-308, + 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997738281e-292, 1.4493302844111182601e-308, + 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997738281e-292, 1.4493302844111182601e-308, + 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997738281e-292, 1.4493302844111182601e-308, + 5.3223249184882342185e-261, -1.472095602234059958e-277, 2.8287088295287585094e-294, -1.0874435234232647519e-310, + 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, -1.0874435234232647519e-310, + 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, -1.0874435234232647519e-310, + 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, -1.0874435234232647519e-310, + 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, -1.0874435234232647519e-310, + 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, -1.0874435234232647519e-310, + 1.1412520821444306741e-262, -6.1787496089661820348e-279, -3.028042329852615431e-295, -2.182740474438892116e-311, + 5.0610577601348040988e-263, 7.9243314524777990283e-279, -3.028042329852615431e-295, -2.182740474438892116e-311, + 1.8853262294800541881e-263, 8.7279092175580810531e-280, 8.8634899828990930877e-296, -9.8167844904532653004e-314, + 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 9.8977243486757054781e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 9.8977243486757054781e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 4.9356438320276576408e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 2.4546035737036337221e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 1.2140834445416214873e-265, 1.8893435613692150014e-281, 3.0075895258731974416e-297, -9.8167844904532653004e-314, + 5.9382337996061564537e-266, 5.1208955146257653156e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 2.8369334767011265554e-266, 5.1208955146257653156e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 1.2862833152486119506e-266, 1.6777604898591683764e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, + 5.1095823452235464813e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, + 1.2329569415922591084e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, + 1.2329569415922591084e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, + 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, + 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, + 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, + 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, + 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, + 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, + 6.3684349745470443788e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 3.6369654387311681856e-319, + 6.3684349745470443788e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 3.6369654387311681856e-319, + 2.5826679788133653036e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 3.6369654387311681856e-319, + 6.8978448094652555593e-271, 1.1480487920352081009e-286, 7.5257037990230704094e-303, 3.6369654387311681856e-319, + 6.8978448094652555593e-271, 1.1480487920352081009e-286, 7.5257037990230704094e-303, 3.6369654387311681856e-319, + 2.1656360647981577662e-271, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, + 2.1656360647981577662e-271, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, + 9.825838786313830552e-272, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, + 3.9105778554799569972e-272, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, + 9.5294739006302120482e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, + 9.5294739006302120482e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, + 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, + 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, + 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 6.4228533959362050743e-323, +}; + +NOEXPORT ALIGNED(64) const float Sleef_rempitabsp[] = { + 0.159154892, 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, + 0.03415493667, 6.420638243e-09, 7.342738037e-17, 8.135951656e-24, + 0.03415493667, 6.420638243e-09, 7.342738037e-17, 8.135951656e-24, + 0.002904943191, -9.861969574e-11, -9.839336547e-18, -1.790215892e-24, + 0.002904943191, -9.861969574e-11, -9.839336547e-18, -1.790215892e-24, + 0.002904943191, -9.861969574e-11, -9.839336547e-18, -1.790215892e-24, + 0.002904943191, -9.861969574e-11, -9.839336547e-18, -1.790215892e-24, + 0.0009518179577, 1.342109202e-10, 1.791623576e-17, 1.518506657e-24, + 0.0009518179577, 1.342109202e-10, 1.791623576e-17, 1.518506657e-24, + 0.0004635368241, 1.779561221e-11, 4.038449606e-18, -1.358546052e-25, + 0.0002193961991, 1.779561221e-11, 4.038449606e-18, -1.358546052e-25, + 9.73258866e-05, 1.779561221e-11, 4.038449606e-18, -1.358546052e-25, + 3.62907449e-05, 3.243700447e-12, 5.690024473e-19, 7.09405479e-26, + 5.773168596e-06, 1.424711477e-12, 1.3532163e-19, 1.92417627e-26, + 5.773168596e-06, 1.424711477e-12, 1.3532163e-19, 1.92417627e-26, + 5.773168596e-06, 1.424711477e-12, 1.3532163e-19, 1.92417627e-26, + 1.958472239e-06, 5.152167755e-13, 1.3532163e-19, 1.92417627e-26, + 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, + 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, + 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, + 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, + 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, + 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, + 2.132179588e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, + 6.420638243e-09, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, + 6.420638243e-09, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, + 2.695347945e-09, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, + 8.327027956e-10, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, + 8.327027956e-10, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, + 3.670415083e-10, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, + 1.342109202e-10, 1.791623576e-17, 1.518506361e-24, 2.613904e-31, + 1.779561221e-11, 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, + 1.779561221e-11, 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, + 1.779561221e-11, 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, + 3.243700447e-12, 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, + 3.243700447e-12, 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, + 3.243700447e-12, 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, + 1.424711477e-12, 1.3532163e-19, 1.924175961e-26, 2.545416018e-33, + 5.152167755e-13, 1.3532163e-19, 1.924175961e-26, 2.545416018e-33, + 6.046956013e-14, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, + 6.046956013e-14, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, + 6.046956013e-14, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, + 3.626141271e-15, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, + 3.626141271e-15, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, + 3.626141271e-15, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, + 3.626141271e-15, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, + 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, + 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, + 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, + 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, + 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, + 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, + 1.791623576e-17, 1.518506361e-24, 2.61390353e-31, 4.764937743e-38, + 1.791623576e-17, 1.518506361e-24, 2.61390353e-31, 4.764937743e-38, + 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, 6.296048013e-40, + 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, 6.296048013e-40, + 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, 6.296048013e-40, + 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, 6.296048013e-40, + 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, 6.296048013e-40, + 1.3532163e-19, 1.924175961e-26, 2.545415467e-33, 6.296048013e-40, + 1.3532163e-19, 1.924175961e-26, 2.545415467e-33, 6.296048013e-40, + 2.690143217e-20, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, + 2.690143217e-20, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, + 2.690143217e-20, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, + 1.334890502e-20, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, + 6.572641438e-21, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, + 0.05874381959, 1.222115387e-08, 7.693612965e-16, 1.792054435e-22, + 0.02749382704, 4.77057327e-09, 7.693612965e-16, 1.792054435e-22, + 0.01186883077, 1.045283415e-09, 3.252721926e-16, 7.332633139e-23, + 0.00405633077, 1.045283415e-09, 3.252721926e-16, 7.332633139e-23, + 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, + 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, + 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, + 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, + 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, + 2.801149822e-05, 4.821800945e-12, 8.789757674e-19, 1.208447639e-25, + 2.801149822e-05, 4.821800945e-12, 8.789757674e-19, 1.208447639e-25, + 2.801149822e-05, 4.821800945e-12, 8.789757674e-19, 1.208447639e-25, + 1.275271279e-05, 1.183823005e-12, 1.161414894e-20, 1.291319272e-27, + 5.12331826e-06, 1.183823005e-12, 1.161414894e-20, 1.291319272e-27, + 1.308621904e-06, 2.743283031e-13, 1.161414894e-20, 1.291319272e-27, + 1.308621904e-06, 2.743283031e-13, 1.161414894e-20, 1.291319272e-27, + 3.549478151e-07, 4.695462769e-14, 1.161414894e-20, 1.291319272e-27, + 3.549478151e-07, 4.695462769e-14, 1.161414894e-20, 1.291319272e-27, + 1.165292645e-07, 1.853292503e-14, 4.837885366e-21, 1.291319272e-27, + 1.165292645e-07, 1.853292503e-14, 4.837885366e-21, 1.291319272e-27, + 5.69246339e-08, 4.322073705e-15, 1.449754789e-21, 7.962890365e-29, + 2.712231151e-08, 4.322073705e-15, 1.449754789e-21, 7.962890365e-29, + 1.222115387e-08, 7.693612965e-16, 1.792054182e-22, 2.91418027e-29, + 4.77057327e-09, 7.693612965e-16, 1.792054182e-22, 2.91418027e-29, + 1.045283415e-09, 3.252721926e-16, 7.332632508e-23, 3.898253736e-30, + 1.045283415e-09, 3.252721926e-16, 7.332632508e-23, 3.898253736e-30, + 1.139611461e-10, 1.996093359e-17, 5.344349223e-25, 1.511644828e-31, + 1.139611461e-10, 1.996093359e-17, 5.344349223e-25, 1.511644828e-31, + 1.139611461e-10, 1.996093359e-17, 5.344349223e-25, 1.511644828e-31, + 1.139611461e-10, 1.996093359e-17, 5.344349223e-25, 1.511644828e-31, + 5.575349904e-11, 6.083145782e-18, 5.344349223e-25, 1.511644828e-31, + 2.664967552e-11, -8.557475018e-19, -8.595036458e-26, -2.139883875e-32, + 1.209775682e-11, 2.61369883e-18, 5.344349223e-25, 1.511644828e-31, + 4.821800945e-12, 8.789757674e-19, 1.208447639e-25, 3.253064536e-33, + 1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34, + 1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34, + 2.743283031e-13, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34, +}; diff --git a/src/rename.h b/src/rename.h new file mode 100644 index 00000000..077584b9 --- /dev/null +++ b/src/rename.h @@ -0,0 +1,337 @@ +#ifndef RENAMESCALAR_H + #define RENAMESCALAR_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions scalar */ + + + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_scalar_f64 +#define xsinf nsimd_sleef_sin_u35d_scalar_f32 +#define xcos nsimd_sleef_cos_u35d_scalar_f64 +#define xcosf nsimd_sleef_cos_u35d_scalar_f32 +#define xsincos nsimd_sleef_sincos_u35d_scalar_f64 +#define xsincosf nsimd_sleef_sincos_u35d_scalar_f32 +#define xtan nsimd_sleef_tan_u35d_scalar_f64 +#define xtanf nsimd_sleef_tan_u35d_scalar_f32 +#define xasin nsimd_sleef_asin_u35d_scalar_f64 +#define xasinf nsimd_sleef_asin_u35d_scalar_f32 +#define xacos nsimd_sleef_acos_u35d_scalar_f64 +#define xacosf nsimd_sleef_acos_u35d_scalar_f32 +#define xatan nsimd_sleef_atan_u35d_scalar_f64 +#define xatanf nsimd_sleef_atan_u35d_scalar_f32 +#define xatan2 nsimd_sleef_atan2_u35d_scalar_f64 +#define xatan2f nsimd_sleef_atan2_u35d_scalar_f32 +#define xlog nsimd_sleef_log_u35d_scalar_f64 +#define xlogf nsimd_sleef_log_u35d_scalar_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_scalar_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_scalar_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_scalar_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_scalar_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_scalar_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_scalar_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_scalar_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_scalar_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_scalar_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_scalar_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_scalar_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_scalar_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_scalar_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_scalar_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_scalar_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_scalar_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_scalar_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_scalar_f32 +#define xlog_u1 nsimd_sleef_log_u10d_scalar_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_scalar_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_scalar_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_scalar_f32 +#define xexp nsimd_sleef_exp_u10d_scalar_f64 +#define xexpf nsimd_sleef_exp_u10d_scalar_f32 +#define xpow nsimd_sleef_pow_u10d_scalar_f64 +#define xpowf nsimd_sleef_pow_u10d_scalar_f32 +#define xsinh nsimd_sleef_sinh_u10d_scalar_f64 +#define xsinhf nsimd_sleef_sinh_u10d_scalar_f32 +#define xcosh nsimd_sleef_cosh_u10d_scalar_f64 +#define xcoshf nsimd_sleef_cosh_u10d_scalar_f32 +#define xtanh nsimd_sleef_tanh_u10d_scalar_f64 +#define xtanhf nsimd_sleef_tanh_u10d_scalar_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_scalar_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_scalar_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_scalar_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_scalar_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_scalar_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_scalar_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_scalar_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_scalar_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_scalar_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_scalar_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_scalar_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_scalar_f32 +#define xasinh nsimd_sleef_asinh_u10d_scalar_f64 +#define xasinhf nsimd_sleef_asinh_u10d_scalar_f32 +#define xacosh nsimd_sleef_acosh_u10d_scalar_f64 +#define xacoshf nsimd_sleef_acosh_u10d_scalar_f32 +#define xatanh nsimd_sleef_atanh_u10d_scalar_f64 +#define xatanhf nsimd_sleef_atanh_u10d_scalar_f32 +#define xexp2 nsimd_sleef_exp2_u10d_scalar_f64 +#define xexp2f nsimd_sleef_exp2_u10d_scalar_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_scalar_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_scalar_f32 +#define xexp10 nsimd_sleef_exp10_u10d_scalar_f64 +#define xexp10f nsimd_sleef_exp10_u10d_scalar_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_scalar_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_scalar_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_scalar_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_scalar_f32 +#define xlog10 nsimd_sleef_log10_u10d_scalar_f64 +#define xlog10f nsimd_sleef_log10_u10d_scalar_f32 +#define xlog2 nsimd_sleef_log2_u10d_scalar_f64 +#define xlog2f nsimd_sleef_log2_u10d_scalar_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_scalar_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_scalar_f32 +#define xlog1p nsimd_sleef_log1p_u10d_scalar_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_scalar_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_scalar_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_scalar_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_scalar_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_scalar_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_scalar_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_scalar_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_scalar_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_scalar_f32 +#define xldexp nsimd_sleef_ldexp_scalar_f64 +#define xldexpf nsimd_sleef_ldexp_scalar_f32 +#define xilogb nsimd_sleef_ilogb_scalar_f64 +#define xilogbf nsimd_sleef_ilogb_scalar_f32 +#define xfma nsimd_sleef_fma_scalar_f64 +#define xfmaf nsimd_sleef_fma_scalar_f32 +#define xsqrt nsimd_sleef_sqrt_scalar_f64 +#define xsqrtf nsimd_sleef_sqrt_scalar_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_scalar_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_scalar_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_scalar_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_scalar_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_scalar_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_scalar_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_scalar_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_scalar_f32 +#define xfabs nsimd_sleef_fabs_scalar_f64 +#define xfabsf nsimd_sleef_fabs_scalar_f32 +#define xcopysign nsimd_sleef_copysign_scalar_f64 +#define xcopysignf nsimd_sleef_copysign_scalar_f32 +#define xfmax nsimd_sleef_fmax_scalar_f64 +#define xfmaxf nsimd_sleef_fmax_scalar_f32 +#define xfmin nsimd_sleef_fmin_scalar_f64 +#define xfminf nsimd_sleef_fmin_scalar_f32 +#define xfdim nsimd_sleef_fdim_scalar_f64 +#define xfdimf nsimd_sleef_fdim_scalar_f32 +#define xtrunc nsimd_sleef_trunc_scalar_f64 +#define xtruncf nsimd_sleef_trunc_scalar_f32 +#define xfloor nsimd_sleef_floor_scalar_f64 +#define xfloorf nsimd_sleef_floor_scalar_f32 +#define xceil nsimd_sleef_ceil_scalar_f64 +#define xceilf nsimd_sleef_ceil_scalar_f32 +#define xround nsimd_sleef_round_scalar_f64 +#define xroundf nsimd_sleef_round_scalar_f32 +#define xrint nsimd_sleef_rint_scalar_f64 +#define xrintf nsimd_sleef_rint_scalar_f32 +#define xnextafter nsimd_sleef_nextafter_scalar_f64 +#define xnextafterf nsimd_sleef_nextafter_scalar_f32 +#define xfrfrexp nsimd_sleef_frfrexp_scalar_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_scalar_f32 +#define xexpfrexp nsimd_sleef_expfrexp_scalar_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_scalar_f32 +#define xfmod nsimd_sleef_fmod_scalar_f64 +#define xfmodf nsimd_sleef_fmod_scalar_f32 +#define xremainder nsimd_sleef_remainder_scalar_f64 +#define xremainderf nsimd_sleef_remainder_scalar_f32 +#define xmodf nsimd_sleef_modf_scalar_f64 +#define xmodff nsimd_sleef_modf_scalar_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_scalar_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_scalar_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_scalar_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_scalar_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_scalar_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_scalar_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_scalar_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_scalar_f32 +#define xgetInt nsimd_sleef_getInt_scalar_f64 +#define xgetIntf nsimd_sleef_getInt_scalar_f32 +#define xgetPtr nsimd_sleef_getPtr_scalar_f64 +#define xgetPtrf nsimd_sleef_getPtr_scalar_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_scalar_f64 +#define xsinf nsimd_sleef_sin_u35_scalar_f32 +#define xcos nsimd_sleef_cos_u35_scalar_f64 +#define xcosf nsimd_sleef_cos_u35_scalar_f32 +#define xsincos nsimd_sleef_sincos_u35_scalar_f64 +#define xsincosf nsimd_sleef_sincos_u35_scalar_f32 +#define xtan nsimd_sleef_tan_u35_scalar_f64 +#define xtanf nsimd_sleef_tan_u35_scalar_f32 +#define xasin nsimd_sleef_asin_u35_scalar_f64 +#define xasinf nsimd_sleef_asin_u35_scalar_f32 +#define xacos nsimd_sleef_acos_u35_scalar_f64 +#define xacosf nsimd_sleef_acos_u35_scalar_f32 +#define xatan nsimd_sleef_atan_u35_scalar_f64 +#define xatanf nsimd_sleef_atan_u35_scalar_f32 +#define xatan2 nsimd_sleef_atan2_u35_scalar_f64 +#define xatan2f nsimd_sleef_atan2_u35_scalar_f32 +#define xlog nsimd_sleef_log_u35_scalar_f64 +#define xlogf nsimd_sleef_log_u35_scalar_f32 +#define xcbrt nsimd_sleef_cbrt_u35_scalar_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_scalar_f32 +#define xsin_u1 nsimd_sleef_sin_u10_scalar_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_scalar_f32 +#define xcos_u1 nsimd_sleef_cos_u10_scalar_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_scalar_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_scalar_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_scalar_f32 +#define xtan_u1 nsimd_sleef_tan_u10_scalar_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_scalar_f32 +#define xasin_u1 nsimd_sleef_asin_u10_scalar_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_scalar_f32 +#define xacos_u1 nsimd_sleef_acos_u10_scalar_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_scalar_f32 +#define xatan_u1 nsimd_sleef_atan_u10_scalar_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_scalar_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_scalar_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_scalar_f32 +#define xlog_u1 nsimd_sleef_log_u10_scalar_f64 +#define xlogf_u1 nsimd_sleef_log_u10_scalar_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_scalar_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_scalar_f32 +#define xexp nsimd_sleef_exp_u10_scalar_f64 +#define xexpf nsimd_sleef_exp_u10_scalar_f32 +#define xpow nsimd_sleef_pow_u10_scalar_f64 +#define xpowf nsimd_sleef_pow_u10_scalar_f32 +#define xsinh nsimd_sleef_sinh_u10_scalar_f64 +#define xsinhf nsimd_sleef_sinh_u10_scalar_f32 +#define xcosh nsimd_sleef_cosh_u10_scalar_f64 +#define xcoshf nsimd_sleef_cosh_u10_scalar_f32 +#define xtanh nsimd_sleef_tanh_u10_scalar_f64 +#define xtanhf nsimd_sleef_tanh_u10_scalar_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_scalar_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_scalar_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_scalar_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_scalar_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_scalar_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_scalar_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_scalar_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_scalar_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_scalar_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_scalar_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_scalar_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_scalar_f32 +#define xasinh nsimd_sleef_asinh_u10_scalar_f64 +#define xasinhf nsimd_sleef_asinh_u10_scalar_f32 +#define xacosh nsimd_sleef_acosh_u10_scalar_f64 +#define xacoshf nsimd_sleef_acosh_u10_scalar_f32 +#define xatanh nsimd_sleef_atanh_u10_scalar_f64 +#define xatanhf nsimd_sleef_atanh_u10_scalar_f32 +#define xexp2 nsimd_sleef_exp2_u10_scalar_f64 +#define xexp2f nsimd_sleef_exp2_u10_scalar_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_scalar_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_scalar_f32 +#define xexp10 nsimd_sleef_exp10_u10_scalar_f64 +#define xexp10f nsimd_sleef_exp10_u10_scalar_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_scalar_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_scalar_f32 +#define xexpm1 nsimd_sleef_expm1_u10_scalar_f64 +#define xexpm1f nsimd_sleef_expm1_u10_scalar_f32 +#define xlog10 nsimd_sleef_log10_u10_scalar_f64 +#define xlog10f nsimd_sleef_log10_u10_scalar_f32 +#define xlog2 nsimd_sleef_log2_u10_scalar_f64 +#define xlog2f nsimd_sleef_log2_u10_scalar_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_scalar_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_scalar_f32 +#define xlog1p nsimd_sleef_log1p_u10_scalar_f64 +#define xlog1pf nsimd_sleef_log1p_u10_scalar_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_scalar_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_scalar_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_scalar_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_scalar_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_scalar_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_scalar_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_scalar_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_scalar_f32 +#define xldexp nsimd_sleef_ldexp_scalar_f64 +#define xldexpf nsimd_sleef_ldexp_scalar_f32 +#define xilogb nsimd_sleef_ilogb_scalar_f64 +#define xilogbf nsimd_sleef_ilogb_scalar_f32 +#define xfma nsimd_sleef_fma_scalar_f64 +#define xfmaf nsimd_sleef_fma_scalar_f32 +#define xsqrt nsimd_sleef_sqrt_scalar_f64 +#define xsqrtf nsimd_sleef_sqrt_scalar_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_scalar_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_scalar_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_scalar_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_scalar_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_scalar_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_scalar_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_scalar_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_scalar_f32 +#define xfabs nsimd_sleef_fabs_scalar_f64 +#define xfabsf nsimd_sleef_fabs_scalar_f32 +#define xcopysign nsimd_sleef_copysign_scalar_f64 +#define xcopysignf nsimd_sleef_copysign_scalar_f32 +#define xfmax nsimd_sleef_fmax_scalar_f64 +#define xfmaxf nsimd_sleef_fmax_scalar_f32 +#define xfmin nsimd_sleef_fmin_scalar_f64 +#define xfminf nsimd_sleef_fmin_scalar_f32 +#define xfdim nsimd_sleef_fdim_scalar_f64 +#define xfdimf nsimd_sleef_fdim_scalar_f32 +#define xtrunc nsimd_sleef_trunc_scalar_f64 +#define xtruncf nsimd_sleef_trunc_scalar_f32 +#define xfloor nsimd_sleef_floor_scalar_f64 +#define xfloorf nsimd_sleef_floor_scalar_f32 +#define xceil nsimd_sleef_ceil_scalar_f64 +#define xceilf nsimd_sleef_ceil_scalar_f32 +#define xround nsimd_sleef_round_scalar_f64 +#define xroundf nsimd_sleef_round_scalar_f32 +#define xrint nsimd_sleef_rint_scalar_f64 +#define xrintf nsimd_sleef_rint_scalar_f32 +#define xnextafter nsimd_sleef_nextafter_scalar_f64 +#define xnextafterf nsimd_sleef_nextafter_scalar_f32 +#define xfrfrexp nsimd_sleef_frfrexp_scalar_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_scalar_f32 +#define xexpfrexp nsimd_sleef_expfrexp_scalar_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_scalar_f32 +#define xfmod nsimd_sleef_fmod_scalar_f64 +#define xfmodf nsimd_sleef_fmod_scalar_f32 +#define xremainder nsimd_sleef_remainder_scalar_f64 +#define xremainderf nsimd_sleef_remainder_scalar_f32 +#define xmodf nsimd_sleef_modf_scalar_f64 +#define xmodff nsimd_sleef_modf_scalar_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_scalar_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_scalar_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_scalar_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_scalar_f32 +#define xerf_u1 nsimd_sleef_erf_u10_scalar_f64 +#define xerff_u1 nsimd_sleef_erf_u10_scalar_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_scalar_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_scalar_f32 +#define xgetInt nsimd_sleef_getInt_scalar_f64 +#define xgetIntf nsimd_sleef_getInt_scalar_f32 +#define xgetPtr nsimd_sleef_getPtr_scalar_f64 +#define xgetPtrf nsimd_sleef_getPtr_scalar_f32 + + #endif + + #define rempi nsimd_sleef_rempi_scalar + #define rempif nsimd_sleef_rempif_scalar + #define rempisub nsimd_sleef_rempisub_scalar + #define rempisubf nsimd_sleef_rempisubf_scalar + #define gammak nsimd_gammak_scalar + #define gammafk nsimd_gammafk_scalar + + + + + +#endif + diff --git a/src/renameadvsimd.h b/src/renameadvsimd.h new file mode 100644 index 00000000..0e752008 --- /dev/null +++ b/src/renameadvsimd.h @@ -0,0 +1,337 @@ +#ifndef RENAMEADVSIMD_H + #define RENAMEADVSIMD_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions aarch64 */ + + #ifdef NSIMD_AARCH64 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_aarch64_f64 +#define xsinf nsimd_sleef_sin_u35d_aarch64_f32 +#define xcos nsimd_sleef_cos_u35d_aarch64_f64 +#define xcosf nsimd_sleef_cos_u35d_aarch64_f32 +#define xsincos nsimd_sleef_sincos_u35d_aarch64_f64 +#define xsincosf nsimd_sleef_sincos_u35d_aarch64_f32 +#define xtan nsimd_sleef_tan_u35d_aarch64_f64 +#define xtanf nsimd_sleef_tan_u35d_aarch64_f32 +#define xasin nsimd_sleef_asin_u35d_aarch64_f64 +#define xasinf nsimd_sleef_asin_u35d_aarch64_f32 +#define xacos nsimd_sleef_acos_u35d_aarch64_f64 +#define xacosf nsimd_sleef_acos_u35d_aarch64_f32 +#define xatan nsimd_sleef_atan_u35d_aarch64_f64 +#define xatanf nsimd_sleef_atan_u35d_aarch64_f32 +#define xatan2 nsimd_sleef_atan2_u35d_aarch64_f64 +#define xatan2f nsimd_sleef_atan2_u35d_aarch64_f32 +#define xlog nsimd_sleef_log_u35d_aarch64_f64 +#define xlogf nsimd_sleef_log_u35d_aarch64_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_aarch64_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_aarch64_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_aarch64_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_aarch64_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_aarch64_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_aarch64_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_aarch64_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_aarch64_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_aarch64_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_aarch64_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_aarch64_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_aarch64_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_aarch64_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_aarch64_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_aarch64_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_aarch64_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_aarch64_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_aarch64_f32 +#define xlog_u1 nsimd_sleef_log_u10d_aarch64_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_aarch64_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_aarch64_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_aarch64_f32 +#define xexp nsimd_sleef_exp_u10d_aarch64_f64 +#define xexpf nsimd_sleef_exp_u10d_aarch64_f32 +#define xpow nsimd_sleef_pow_u10d_aarch64_f64 +#define xpowf nsimd_sleef_pow_u10d_aarch64_f32 +#define xsinh nsimd_sleef_sinh_u10d_aarch64_f64 +#define xsinhf nsimd_sleef_sinh_u10d_aarch64_f32 +#define xcosh nsimd_sleef_cosh_u10d_aarch64_f64 +#define xcoshf nsimd_sleef_cosh_u10d_aarch64_f32 +#define xtanh nsimd_sleef_tanh_u10d_aarch64_f64 +#define xtanhf nsimd_sleef_tanh_u10d_aarch64_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_aarch64_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_aarch64_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_aarch64_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_aarch64_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_aarch64_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_aarch64_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_aarch64_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_aarch64_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_aarch64_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_aarch64_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_aarch64_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_aarch64_f32 +#define xasinh nsimd_sleef_asinh_u10d_aarch64_f64 +#define xasinhf nsimd_sleef_asinh_u10d_aarch64_f32 +#define xacosh nsimd_sleef_acosh_u10d_aarch64_f64 +#define xacoshf nsimd_sleef_acosh_u10d_aarch64_f32 +#define xatanh nsimd_sleef_atanh_u10d_aarch64_f64 +#define xatanhf nsimd_sleef_atanh_u10d_aarch64_f32 +#define xexp2 nsimd_sleef_exp2_u10d_aarch64_f64 +#define xexp2f nsimd_sleef_exp2_u10d_aarch64_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_aarch64_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_aarch64_f32 +#define xexp10 nsimd_sleef_exp10_u10d_aarch64_f64 +#define xexp10f nsimd_sleef_exp10_u10d_aarch64_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_aarch64_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_aarch64_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_aarch64_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_aarch64_f32 +#define xlog10 nsimd_sleef_log10_u10d_aarch64_f64 +#define xlog10f nsimd_sleef_log10_u10d_aarch64_f32 +#define xlog2 nsimd_sleef_log2_u10d_aarch64_f64 +#define xlog2f nsimd_sleef_log2_u10d_aarch64_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_aarch64_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_aarch64_f32 +#define xlog1p nsimd_sleef_log1p_u10d_aarch64_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_aarch64_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_aarch64_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_aarch64_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_aarch64_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_aarch64_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_aarch64_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_aarch64_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_aarch64_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_aarch64_f32 +#define xldexp nsimd_sleef_ldexp_aarch64_f64 +#define xldexpf nsimd_sleef_ldexp_aarch64_f32 +#define xilogb nsimd_sleef_ilogb_aarch64_f64 +#define xilogbf nsimd_sleef_ilogb_aarch64_f32 +#define xfma nsimd_sleef_fma_aarch64_f64 +#define xfmaf nsimd_sleef_fma_aarch64_f32 +#define xsqrt nsimd_sleef_sqrt_aarch64_f64 +#define xsqrtf nsimd_sleef_sqrt_aarch64_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_aarch64_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_aarch64_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_aarch64_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_aarch64_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_aarch64_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_aarch64_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_aarch64_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_aarch64_f32 +#define xfabs nsimd_sleef_fabs_aarch64_f64 +#define xfabsf nsimd_sleef_fabs_aarch64_f32 +#define xcopysign nsimd_sleef_copysign_aarch64_f64 +#define xcopysignf nsimd_sleef_copysign_aarch64_f32 +#define xfmax nsimd_sleef_fmax_aarch64_f64 +#define xfmaxf nsimd_sleef_fmax_aarch64_f32 +#define xfmin nsimd_sleef_fmin_aarch64_f64 +#define xfminf nsimd_sleef_fmin_aarch64_f32 +#define xfdim nsimd_sleef_fdim_aarch64_f64 +#define xfdimf nsimd_sleef_fdim_aarch64_f32 +#define xtrunc nsimd_sleef_trunc_aarch64_f64 +#define xtruncf nsimd_sleef_trunc_aarch64_f32 +#define xfloor nsimd_sleef_floor_aarch64_f64 +#define xfloorf nsimd_sleef_floor_aarch64_f32 +#define xceil nsimd_sleef_ceil_aarch64_f64 +#define xceilf nsimd_sleef_ceil_aarch64_f32 +#define xround nsimd_sleef_round_aarch64_f64 +#define xroundf nsimd_sleef_round_aarch64_f32 +#define xrint nsimd_sleef_rint_aarch64_f64 +#define xrintf nsimd_sleef_rint_aarch64_f32 +#define xnextafter nsimd_sleef_nextafter_aarch64_f64 +#define xnextafterf nsimd_sleef_nextafter_aarch64_f32 +#define xfrfrexp nsimd_sleef_frfrexp_aarch64_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_aarch64_f32 +#define xexpfrexp nsimd_sleef_expfrexp_aarch64_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_aarch64_f32 +#define xfmod nsimd_sleef_fmod_aarch64_f64 +#define xfmodf nsimd_sleef_fmod_aarch64_f32 +#define xremainder nsimd_sleef_remainder_aarch64_f64 +#define xremainderf nsimd_sleef_remainder_aarch64_f32 +#define xmodf nsimd_sleef_modf_aarch64_f64 +#define xmodff nsimd_sleef_modf_aarch64_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_aarch64_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_aarch64_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_aarch64_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_aarch64_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_aarch64_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_aarch64_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_aarch64_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_aarch64_f32 +#define xgetInt nsimd_sleef_getInt_aarch64_f64 +#define xgetIntf nsimd_sleef_getInt_aarch64_f32 +#define xgetPtr nsimd_sleef_getPtr_aarch64_f64 +#define xgetPtrf nsimd_sleef_getPtr_aarch64_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_aarch64_f64 +#define xsinf nsimd_sleef_sin_u35_aarch64_f32 +#define xcos nsimd_sleef_cos_u35_aarch64_f64 +#define xcosf nsimd_sleef_cos_u35_aarch64_f32 +#define xsincos nsimd_sleef_sincos_u35_aarch64_f64 +#define xsincosf nsimd_sleef_sincos_u35_aarch64_f32 +#define xtan nsimd_sleef_tan_u35_aarch64_f64 +#define xtanf nsimd_sleef_tan_u35_aarch64_f32 +#define xasin nsimd_sleef_asin_u35_aarch64_f64 +#define xasinf nsimd_sleef_asin_u35_aarch64_f32 +#define xacos nsimd_sleef_acos_u35_aarch64_f64 +#define xacosf nsimd_sleef_acos_u35_aarch64_f32 +#define xatan nsimd_sleef_atan_u35_aarch64_f64 +#define xatanf nsimd_sleef_atan_u35_aarch64_f32 +#define xatan2 nsimd_sleef_atan2_u35_aarch64_f64 +#define xatan2f nsimd_sleef_atan2_u35_aarch64_f32 +#define xlog nsimd_sleef_log_u35_aarch64_f64 +#define xlogf nsimd_sleef_log_u35_aarch64_f32 +#define xcbrt nsimd_sleef_cbrt_u35_aarch64_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_aarch64_f32 +#define xsin_u1 nsimd_sleef_sin_u10_aarch64_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_aarch64_f32 +#define xcos_u1 nsimd_sleef_cos_u10_aarch64_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_aarch64_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_aarch64_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_aarch64_f32 +#define xtan_u1 nsimd_sleef_tan_u10_aarch64_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_aarch64_f32 +#define xasin_u1 nsimd_sleef_asin_u10_aarch64_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_aarch64_f32 +#define xacos_u1 nsimd_sleef_acos_u10_aarch64_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_aarch64_f32 +#define xatan_u1 nsimd_sleef_atan_u10_aarch64_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_aarch64_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_aarch64_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_aarch64_f32 +#define xlog_u1 nsimd_sleef_log_u10_aarch64_f64 +#define xlogf_u1 nsimd_sleef_log_u10_aarch64_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_aarch64_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_aarch64_f32 +#define xexp nsimd_sleef_exp_u10_aarch64_f64 +#define xexpf nsimd_sleef_exp_u10_aarch64_f32 +#define xpow nsimd_sleef_pow_u10_aarch64_f64 +#define xpowf nsimd_sleef_pow_u10_aarch64_f32 +#define xsinh nsimd_sleef_sinh_u10_aarch64_f64 +#define xsinhf nsimd_sleef_sinh_u10_aarch64_f32 +#define xcosh nsimd_sleef_cosh_u10_aarch64_f64 +#define xcoshf nsimd_sleef_cosh_u10_aarch64_f32 +#define xtanh nsimd_sleef_tanh_u10_aarch64_f64 +#define xtanhf nsimd_sleef_tanh_u10_aarch64_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_aarch64_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_aarch64_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_aarch64_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_aarch64_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_aarch64_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_aarch64_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_aarch64_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_aarch64_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_aarch64_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_aarch64_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_aarch64_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_aarch64_f32 +#define xasinh nsimd_sleef_asinh_u10_aarch64_f64 +#define xasinhf nsimd_sleef_asinh_u10_aarch64_f32 +#define xacosh nsimd_sleef_acosh_u10_aarch64_f64 +#define xacoshf nsimd_sleef_acosh_u10_aarch64_f32 +#define xatanh nsimd_sleef_atanh_u10_aarch64_f64 +#define xatanhf nsimd_sleef_atanh_u10_aarch64_f32 +#define xexp2 nsimd_sleef_exp2_u10_aarch64_f64 +#define xexp2f nsimd_sleef_exp2_u10_aarch64_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_aarch64_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_aarch64_f32 +#define xexp10 nsimd_sleef_exp10_u10_aarch64_f64 +#define xexp10f nsimd_sleef_exp10_u10_aarch64_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_aarch64_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_aarch64_f32 +#define xexpm1 nsimd_sleef_expm1_u10_aarch64_f64 +#define xexpm1f nsimd_sleef_expm1_u10_aarch64_f32 +#define xlog10 nsimd_sleef_log10_u10_aarch64_f64 +#define xlog10f nsimd_sleef_log10_u10_aarch64_f32 +#define xlog2 nsimd_sleef_log2_u10_aarch64_f64 +#define xlog2f nsimd_sleef_log2_u10_aarch64_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_aarch64_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_aarch64_f32 +#define xlog1p nsimd_sleef_log1p_u10_aarch64_f64 +#define xlog1pf nsimd_sleef_log1p_u10_aarch64_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_aarch64_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_aarch64_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_aarch64_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_aarch64_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_aarch64_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_aarch64_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_aarch64_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_aarch64_f32 +#define xldexp nsimd_sleef_ldexp_aarch64_f64 +#define xldexpf nsimd_sleef_ldexp_aarch64_f32 +#define xilogb nsimd_sleef_ilogb_aarch64_f64 +#define xilogbf nsimd_sleef_ilogb_aarch64_f32 +#define xfma nsimd_sleef_fma_aarch64_f64 +#define xfmaf nsimd_sleef_fma_aarch64_f32 +#define xsqrt nsimd_sleef_sqrt_aarch64_f64 +#define xsqrtf nsimd_sleef_sqrt_aarch64_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_aarch64_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_aarch64_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_aarch64_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_aarch64_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_aarch64_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_aarch64_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_aarch64_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_aarch64_f32 +#define xfabs nsimd_sleef_fabs_aarch64_f64 +#define xfabsf nsimd_sleef_fabs_aarch64_f32 +#define xcopysign nsimd_sleef_copysign_aarch64_f64 +#define xcopysignf nsimd_sleef_copysign_aarch64_f32 +#define xfmax nsimd_sleef_fmax_aarch64_f64 +#define xfmaxf nsimd_sleef_fmax_aarch64_f32 +#define xfmin nsimd_sleef_fmin_aarch64_f64 +#define xfminf nsimd_sleef_fmin_aarch64_f32 +#define xfdim nsimd_sleef_fdim_aarch64_f64 +#define xfdimf nsimd_sleef_fdim_aarch64_f32 +#define xtrunc nsimd_sleef_trunc_aarch64_f64 +#define xtruncf nsimd_sleef_trunc_aarch64_f32 +#define xfloor nsimd_sleef_floor_aarch64_f64 +#define xfloorf nsimd_sleef_floor_aarch64_f32 +#define xceil nsimd_sleef_ceil_aarch64_f64 +#define xceilf nsimd_sleef_ceil_aarch64_f32 +#define xround nsimd_sleef_round_aarch64_f64 +#define xroundf nsimd_sleef_round_aarch64_f32 +#define xrint nsimd_sleef_rint_aarch64_f64 +#define xrintf nsimd_sleef_rint_aarch64_f32 +#define xnextafter nsimd_sleef_nextafter_aarch64_f64 +#define xnextafterf nsimd_sleef_nextafter_aarch64_f32 +#define xfrfrexp nsimd_sleef_frfrexp_aarch64_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_aarch64_f32 +#define xexpfrexp nsimd_sleef_expfrexp_aarch64_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_aarch64_f32 +#define xfmod nsimd_sleef_fmod_aarch64_f64 +#define xfmodf nsimd_sleef_fmod_aarch64_f32 +#define xremainder nsimd_sleef_remainder_aarch64_f64 +#define xremainderf nsimd_sleef_remainder_aarch64_f32 +#define xmodf nsimd_sleef_modf_aarch64_f64 +#define xmodff nsimd_sleef_modf_aarch64_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_aarch64_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_aarch64_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_aarch64_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_aarch64_f32 +#define xerf_u1 nsimd_sleef_erf_u10_aarch64_f64 +#define xerff_u1 nsimd_sleef_erf_u10_aarch64_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_aarch64_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_aarch64_f32 +#define xgetInt nsimd_sleef_getInt_aarch64_f64 +#define xgetIntf nsimd_sleef_getInt_aarch64_f32 +#define xgetPtr nsimd_sleef_getPtr_aarch64_f64 +#define xgetPtrf nsimd_sleef_getPtr_aarch64_f32 + + #endif + + #define rempi nsimd_sleef_rempi_aarch64 + #define rempif nsimd_sleef_rempif_aarch64 + #define rempisub nsimd_sleef_rempisub_aarch64 + #define rempisubf nsimd_sleef_rempisubf_aarch64 + #define gammak nsimd_gammak_aarch64 + #define gammafk nsimd_gammafk_aarch64 + + #endif + + + +#endif + diff --git a/src/renameavx.h b/src/renameavx.h new file mode 100644 index 00000000..1793fb92 --- /dev/null +++ b/src/renameavx.h @@ -0,0 +1,337 @@ +#ifndef RENAMEAVX_H + #define RENAMEAVX_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions avx */ + + #ifdef NSIMD_AVX + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_avx_f64 +#define xsinf nsimd_sleef_sin_u35d_avx_f32 +#define xcos nsimd_sleef_cos_u35d_avx_f64 +#define xcosf nsimd_sleef_cos_u35d_avx_f32 +#define xsincos nsimd_sleef_sincos_u35d_avx_f64 +#define xsincosf nsimd_sleef_sincos_u35d_avx_f32 +#define xtan nsimd_sleef_tan_u35d_avx_f64 +#define xtanf nsimd_sleef_tan_u35d_avx_f32 +#define xasin nsimd_sleef_asin_u35d_avx_f64 +#define xasinf nsimd_sleef_asin_u35d_avx_f32 +#define xacos nsimd_sleef_acos_u35d_avx_f64 +#define xacosf nsimd_sleef_acos_u35d_avx_f32 +#define xatan nsimd_sleef_atan_u35d_avx_f64 +#define xatanf nsimd_sleef_atan_u35d_avx_f32 +#define xatan2 nsimd_sleef_atan2_u35d_avx_f64 +#define xatan2f nsimd_sleef_atan2_u35d_avx_f32 +#define xlog nsimd_sleef_log_u35d_avx_f64 +#define xlogf nsimd_sleef_log_u35d_avx_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_avx_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_avx_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_avx_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_avx_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_avx_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_avx_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_avx_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_avx_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_avx_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_avx_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_avx_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_avx_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_avx_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_avx_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_avx_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_avx_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_avx_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_avx_f32 +#define xlog_u1 nsimd_sleef_log_u10d_avx_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_avx_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_avx_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_avx_f32 +#define xexp nsimd_sleef_exp_u10d_avx_f64 +#define xexpf nsimd_sleef_exp_u10d_avx_f32 +#define xpow nsimd_sleef_pow_u10d_avx_f64 +#define xpowf nsimd_sleef_pow_u10d_avx_f32 +#define xsinh nsimd_sleef_sinh_u10d_avx_f64 +#define xsinhf nsimd_sleef_sinh_u10d_avx_f32 +#define xcosh nsimd_sleef_cosh_u10d_avx_f64 +#define xcoshf nsimd_sleef_cosh_u10d_avx_f32 +#define xtanh nsimd_sleef_tanh_u10d_avx_f64 +#define xtanhf nsimd_sleef_tanh_u10d_avx_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_avx_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_avx_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_avx_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_avx_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_avx_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_avx_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_avx_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_avx_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_avx_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_avx_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_avx_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_avx_f32 +#define xasinh nsimd_sleef_asinh_u10d_avx_f64 +#define xasinhf nsimd_sleef_asinh_u10d_avx_f32 +#define xacosh nsimd_sleef_acosh_u10d_avx_f64 +#define xacoshf nsimd_sleef_acosh_u10d_avx_f32 +#define xatanh nsimd_sleef_atanh_u10d_avx_f64 +#define xatanhf nsimd_sleef_atanh_u10d_avx_f32 +#define xexp2 nsimd_sleef_exp2_u10d_avx_f64 +#define xexp2f nsimd_sleef_exp2_u10d_avx_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_avx_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_avx_f32 +#define xexp10 nsimd_sleef_exp10_u10d_avx_f64 +#define xexp10f nsimd_sleef_exp10_u10d_avx_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_avx_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_avx_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_avx_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_avx_f32 +#define xlog10 nsimd_sleef_log10_u10d_avx_f64 +#define xlog10f nsimd_sleef_log10_u10d_avx_f32 +#define xlog2 nsimd_sleef_log2_u10d_avx_f64 +#define xlog2f nsimd_sleef_log2_u10d_avx_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_avx_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_avx_f32 +#define xlog1p nsimd_sleef_log1p_u10d_avx_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_avx_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_avx_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_avx_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_avx_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_avx_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_avx_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_avx_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_avx_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_avx_f32 +#define xldexp nsimd_sleef_ldexp_avx_f64 +#define xldexpf nsimd_sleef_ldexp_avx_f32 +#define xilogb nsimd_sleef_ilogb_avx_f64 +#define xilogbf nsimd_sleef_ilogb_avx_f32 +#define xfma nsimd_sleef_fma_avx_f64 +#define xfmaf nsimd_sleef_fma_avx_f32 +#define xsqrt nsimd_sleef_sqrt_avx_f64 +#define xsqrtf nsimd_sleef_sqrt_avx_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_avx_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_avx_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_avx_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_avx_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_avx_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_avx_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_avx_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_avx_f32 +#define xfabs nsimd_sleef_fabs_avx_f64 +#define xfabsf nsimd_sleef_fabs_avx_f32 +#define xcopysign nsimd_sleef_copysign_avx_f64 +#define xcopysignf nsimd_sleef_copysign_avx_f32 +#define xfmax nsimd_sleef_fmax_avx_f64 +#define xfmaxf nsimd_sleef_fmax_avx_f32 +#define xfmin nsimd_sleef_fmin_avx_f64 +#define xfminf nsimd_sleef_fmin_avx_f32 +#define xfdim nsimd_sleef_fdim_avx_f64 +#define xfdimf nsimd_sleef_fdim_avx_f32 +#define xtrunc nsimd_sleef_trunc_avx_f64 +#define xtruncf nsimd_sleef_trunc_avx_f32 +#define xfloor nsimd_sleef_floor_avx_f64 +#define xfloorf nsimd_sleef_floor_avx_f32 +#define xceil nsimd_sleef_ceil_avx_f64 +#define xceilf nsimd_sleef_ceil_avx_f32 +#define xround nsimd_sleef_round_avx_f64 +#define xroundf nsimd_sleef_round_avx_f32 +#define xrint nsimd_sleef_rint_avx_f64 +#define xrintf nsimd_sleef_rint_avx_f32 +#define xnextafter nsimd_sleef_nextafter_avx_f64 +#define xnextafterf nsimd_sleef_nextafter_avx_f32 +#define xfrfrexp nsimd_sleef_frfrexp_avx_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_avx_f32 +#define xexpfrexp nsimd_sleef_expfrexp_avx_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_avx_f32 +#define xfmod nsimd_sleef_fmod_avx_f64 +#define xfmodf nsimd_sleef_fmod_avx_f32 +#define xremainder nsimd_sleef_remainder_avx_f64 +#define xremainderf nsimd_sleef_remainder_avx_f32 +#define xmodf nsimd_sleef_modf_avx_f64 +#define xmodff nsimd_sleef_modf_avx_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_avx_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_avx_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_avx_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_avx_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_avx_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_avx_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_avx_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_avx_f32 +#define xgetInt nsimd_sleef_getInt_avx_f64 +#define xgetIntf nsimd_sleef_getInt_avx_f32 +#define xgetPtr nsimd_sleef_getPtr_avx_f64 +#define xgetPtrf nsimd_sleef_getPtr_avx_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_avx_f64 +#define xsinf nsimd_sleef_sin_u35_avx_f32 +#define xcos nsimd_sleef_cos_u35_avx_f64 +#define xcosf nsimd_sleef_cos_u35_avx_f32 +#define xsincos nsimd_sleef_sincos_u35_avx_f64 +#define xsincosf nsimd_sleef_sincos_u35_avx_f32 +#define xtan nsimd_sleef_tan_u35_avx_f64 +#define xtanf nsimd_sleef_tan_u35_avx_f32 +#define xasin nsimd_sleef_asin_u35_avx_f64 +#define xasinf nsimd_sleef_asin_u35_avx_f32 +#define xacos nsimd_sleef_acos_u35_avx_f64 +#define xacosf nsimd_sleef_acos_u35_avx_f32 +#define xatan nsimd_sleef_atan_u35_avx_f64 +#define xatanf nsimd_sleef_atan_u35_avx_f32 +#define xatan2 nsimd_sleef_atan2_u35_avx_f64 +#define xatan2f nsimd_sleef_atan2_u35_avx_f32 +#define xlog nsimd_sleef_log_u35_avx_f64 +#define xlogf nsimd_sleef_log_u35_avx_f32 +#define xcbrt nsimd_sleef_cbrt_u35_avx_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_avx_f32 +#define xsin_u1 nsimd_sleef_sin_u10_avx_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_avx_f32 +#define xcos_u1 nsimd_sleef_cos_u10_avx_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_avx_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_avx_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_avx_f32 +#define xtan_u1 nsimd_sleef_tan_u10_avx_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_avx_f32 +#define xasin_u1 nsimd_sleef_asin_u10_avx_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_avx_f32 +#define xacos_u1 nsimd_sleef_acos_u10_avx_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_avx_f32 +#define xatan_u1 nsimd_sleef_atan_u10_avx_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_avx_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_avx_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_avx_f32 +#define xlog_u1 nsimd_sleef_log_u10_avx_f64 +#define xlogf_u1 nsimd_sleef_log_u10_avx_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_avx_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_avx_f32 +#define xexp nsimd_sleef_exp_u10_avx_f64 +#define xexpf nsimd_sleef_exp_u10_avx_f32 +#define xpow nsimd_sleef_pow_u10_avx_f64 +#define xpowf nsimd_sleef_pow_u10_avx_f32 +#define xsinh nsimd_sleef_sinh_u10_avx_f64 +#define xsinhf nsimd_sleef_sinh_u10_avx_f32 +#define xcosh nsimd_sleef_cosh_u10_avx_f64 +#define xcoshf nsimd_sleef_cosh_u10_avx_f32 +#define xtanh nsimd_sleef_tanh_u10_avx_f64 +#define xtanhf nsimd_sleef_tanh_u10_avx_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_avx_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_avx_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_avx_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_avx_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_avx_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_avx_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_avx_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_avx_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_avx_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_avx_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_avx_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_avx_f32 +#define xasinh nsimd_sleef_asinh_u10_avx_f64 +#define xasinhf nsimd_sleef_asinh_u10_avx_f32 +#define xacosh nsimd_sleef_acosh_u10_avx_f64 +#define xacoshf nsimd_sleef_acosh_u10_avx_f32 +#define xatanh nsimd_sleef_atanh_u10_avx_f64 +#define xatanhf nsimd_sleef_atanh_u10_avx_f32 +#define xexp2 nsimd_sleef_exp2_u10_avx_f64 +#define xexp2f nsimd_sleef_exp2_u10_avx_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_avx_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_avx_f32 +#define xexp10 nsimd_sleef_exp10_u10_avx_f64 +#define xexp10f nsimd_sleef_exp10_u10_avx_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_avx_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_avx_f32 +#define xexpm1 nsimd_sleef_expm1_u10_avx_f64 +#define xexpm1f nsimd_sleef_expm1_u10_avx_f32 +#define xlog10 nsimd_sleef_log10_u10_avx_f64 +#define xlog10f nsimd_sleef_log10_u10_avx_f32 +#define xlog2 nsimd_sleef_log2_u10_avx_f64 +#define xlog2f nsimd_sleef_log2_u10_avx_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_avx_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_avx_f32 +#define xlog1p nsimd_sleef_log1p_u10_avx_f64 +#define xlog1pf nsimd_sleef_log1p_u10_avx_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_avx_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_avx_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_avx_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_avx_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_avx_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_avx_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_avx_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_avx_f32 +#define xldexp nsimd_sleef_ldexp_avx_f64 +#define xldexpf nsimd_sleef_ldexp_avx_f32 +#define xilogb nsimd_sleef_ilogb_avx_f64 +#define xilogbf nsimd_sleef_ilogb_avx_f32 +#define xfma nsimd_sleef_fma_avx_f64 +#define xfmaf nsimd_sleef_fma_avx_f32 +#define xsqrt nsimd_sleef_sqrt_avx_f64 +#define xsqrtf nsimd_sleef_sqrt_avx_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_avx_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_avx_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_avx_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_avx_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_avx_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_avx_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_avx_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_avx_f32 +#define xfabs nsimd_sleef_fabs_avx_f64 +#define xfabsf nsimd_sleef_fabs_avx_f32 +#define xcopysign nsimd_sleef_copysign_avx_f64 +#define xcopysignf nsimd_sleef_copysign_avx_f32 +#define xfmax nsimd_sleef_fmax_avx_f64 +#define xfmaxf nsimd_sleef_fmax_avx_f32 +#define xfmin nsimd_sleef_fmin_avx_f64 +#define xfminf nsimd_sleef_fmin_avx_f32 +#define xfdim nsimd_sleef_fdim_avx_f64 +#define xfdimf nsimd_sleef_fdim_avx_f32 +#define xtrunc nsimd_sleef_trunc_avx_f64 +#define xtruncf nsimd_sleef_trunc_avx_f32 +#define xfloor nsimd_sleef_floor_avx_f64 +#define xfloorf nsimd_sleef_floor_avx_f32 +#define xceil nsimd_sleef_ceil_avx_f64 +#define xceilf nsimd_sleef_ceil_avx_f32 +#define xround nsimd_sleef_round_avx_f64 +#define xroundf nsimd_sleef_round_avx_f32 +#define xrint nsimd_sleef_rint_avx_f64 +#define xrintf nsimd_sleef_rint_avx_f32 +#define xnextafter nsimd_sleef_nextafter_avx_f64 +#define xnextafterf nsimd_sleef_nextafter_avx_f32 +#define xfrfrexp nsimd_sleef_frfrexp_avx_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_avx_f32 +#define xexpfrexp nsimd_sleef_expfrexp_avx_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_avx_f32 +#define xfmod nsimd_sleef_fmod_avx_f64 +#define xfmodf nsimd_sleef_fmod_avx_f32 +#define xremainder nsimd_sleef_remainder_avx_f64 +#define xremainderf nsimd_sleef_remainder_avx_f32 +#define xmodf nsimd_sleef_modf_avx_f64 +#define xmodff nsimd_sleef_modf_avx_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_avx_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_avx_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_avx_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_avx_f32 +#define xerf_u1 nsimd_sleef_erf_u10_avx_f64 +#define xerff_u1 nsimd_sleef_erf_u10_avx_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_avx_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_avx_f32 +#define xgetInt nsimd_sleef_getInt_avx_f64 +#define xgetIntf nsimd_sleef_getInt_avx_f32 +#define xgetPtr nsimd_sleef_getPtr_avx_f64 +#define xgetPtrf nsimd_sleef_getPtr_avx_f32 + + #endif + + #define rempi nsimd_sleef_rempi_avx + #define rempif nsimd_sleef_rempif_avx + #define rempisub nsimd_sleef_rempisub_avx + #define rempisubf nsimd_sleef_rempisubf_avx + #define gammak nsimd_gammak_avx + #define gammafk nsimd_gammafk_avx + + #endif + + + +#endif + diff --git a/src/renameavx2.h b/src/renameavx2.h new file mode 100644 index 00000000..71ab9469 --- /dev/null +++ b/src/renameavx2.h @@ -0,0 +1,337 @@ +#ifndef RENAMEAVX2_H + #define RENAMEAVX2_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions avx2 */ + + #ifdef NSIMD_AVX2 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_avx2_f64 +#define xsinf nsimd_sleef_sin_u35d_avx2_f32 +#define xcos nsimd_sleef_cos_u35d_avx2_f64 +#define xcosf nsimd_sleef_cos_u35d_avx2_f32 +#define xsincos nsimd_sleef_sincos_u35d_avx2_f64 +#define xsincosf nsimd_sleef_sincos_u35d_avx2_f32 +#define xtan nsimd_sleef_tan_u35d_avx2_f64 +#define xtanf nsimd_sleef_tan_u35d_avx2_f32 +#define xasin nsimd_sleef_asin_u35d_avx2_f64 +#define xasinf nsimd_sleef_asin_u35d_avx2_f32 +#define xacos nsimd_sleef_acos_u35d_avx2_f64 +#define xacosf nsimd_sleef_acos_u35d_avx2_f32 +#define xatan nsimd_sleef_atan_u35d_avx2_f64 +#define xatanf nsimd_sleef_atan_u35d_avx2_f32 +#define xatan2 nsimd_sleef_atan2_u35d_avx2_f64 +#define xatan2f nsimd_sleef_atan2_u35d_avx2_f32 +#define xlog nsimd_sleef_log_u35d_avx2_f64 +#define xlogf nsimd_sleef_log_u35d_avx2_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_avx2_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_avx2_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_avx2_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_avx2_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_avx2_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_avx2_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_avx2_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_avx2_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_avx2_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_avx2_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_avx2_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_avx2_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_avx2_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_avx2_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_avx2_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_avx2_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_avx2_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_avx2_f32 +#define xlog_u1 nsimd_sleef_log_u10d_avx2_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_avx2_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_avx2_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_avx2_f32 +#define xexp nsimd_sleef_exp_u10d_avx2_f64 +#define xexpf nsimd_sleef_exp_u10d_avx2_f32 +#define xpow nsimd_sleef_pow_u10d_avx2_f64 +#define xpowf nsimd_sleef_pow_u10d_avx2_f32 +#define xsinh nsimd_sleef_sinh_u10d_avx2_f64 +#define xsinhf nsimd_sleef_sinh_u10d_avx2_f32 +#define xcosh nsimd_sleef_cosh_u10d_avx2_f64 +#define xcoshf nsimd_sleef_cosh_u10d_avx2_f32 +#define xtanh nsimd_sleef_tanh_u10d_avx2_f64 +#define xtanhf nsimd_sleef_tanh_u10d_avx2_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_avx2_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_avx2_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_avx2_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_avx2_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_avx2_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_avx2_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_avx2_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_avx2_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_avx2_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_avx2_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_avx2_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_avx2_f32 +#define xasinh nsimd_sleef_asinh_u10d_avx2_f64 +#define xasinhf nsimd_sleef_asinh_u10d_avx2_f32 +#define xacosh nsimd_sleef_acosh_u10d_avx2_f64 +#define xacoshf nsimd_sleef_acosh_u10d_avx2_f32 +#define xatanh nsimd_sleef_atanh_u10d_avx2_f64 +#define xatanhf nsimd_sleef_atanh_u10d_avx2_f32 +#define xexp2 nsimd_sleef_exp2_u10d_avx2_f64 +#define xexp2f nsimd_sleef_exp2_u10d_avx2_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_avx2_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_avx2_f32 +#define xexp10 nsimd_sleef_exp10_u10d_avx2_f64 +#define xexp10f nsimd_sleef_exp10_u10d_avx2_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_avx2_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_avx2_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_avx2_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_avx2_f32 +#define xlog10 nsimd_sleef_log10_u10d_avx2_f64 +#define xlog10f nsimd_sleef_log10_u10d_avx2_f32 +#define xlog2 nsimd_sleef_log2_u10d_avx2_f64 +#define xlog2f nsimd_sleef_log2_u10d_avx2_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_avx2_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_avx2_f32 +#define xlog1p nsimd_sleef_log1p_u10d_avx2_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_avx2_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_avx2_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_avx2_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_avx2_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_avx2_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_avx2_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_avx2_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_avx2_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_avx2_f32 +#define xldexp nsimd_sleef_ldexp_avx2_f64 +#define xldexpf nsimd_sleef_ldexp_avx2_f32 +#define xilogb nsimd_sleef_ilogb_avx2_f64 +#define xilogbf nsimd_sleef_ilogb_avx2_f32 +#define xfma nsimd_sleef_fma_avx2_f64 +#define xfmaf nsimd_sleef_fma_avx2_f32 +#define xsqrt nsimd_sleef_sqrt_avx2_f64 +#define xsqrtf nsimd_sleef_sqrt_avx2_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_avx2_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_avx2_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_avx2_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_avx2_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_avx2_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_avx2_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_avx2_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_avx2_f32 +#define xfabs nsimd_sleef_fabs_avx2_f64 +#define xfabsf nsimd_sleef_fabs_avx2_f32 +#define xcopysign nsimd_sleef_copysign_avx2_f64 +#define xcopysignf nsimd_sleef_copysign_avx2_f32 +#define xfmax nsimd_sleef_fmax_avx2_f64 +#define xfmaxf nsimd_sleef_fmax_avx2_f32 +#define xfmin nsimd_sleef_fmin_avx2_f64 +#define xfminf nsimd_sleef_fmin_avx2_f32 +#define xfdim nsimd_sleef_fdim_avx2_f64 +#define xfdimf nsimd_sleef_fdim_avx2_f32 +#define xtrunc nsimd_sleef_trunc_avx2_f64 +#define xtruncf nsimd_sleef_trunc_avx2_f32 +#define xfloor nsimd_sleef_floor_avx2_f64 +#define xfloorf nsimd_sleef_floor_avx2_f32 +#define xceil nsimd_sleef_ceil_avx2_f64 +#define xceilf nsimd_sleef_ceil_avx2_f32 +#define xround nsimd_sleef_round_avx2_f64 +#define xroundf nsimd_sleef_round_avx2_f32 +#define xrint nsimd_sleef_rint_avx2_f64 +#define xrintf nsimd_sleef_rint_avx2_f32 +#define xnextafter nsimd_sleef_nextafter_avx2_f64 +#define xnextafterf nsimd_sleef_nextafter_avx2_f32 +#define xfrfrexp nsimd_sleef_frfrexp_avx2_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_avx2_f32 +#define xexpfrexp nsimd_sleef_expfrexp_avx2_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_avx2_f32 +#define xfmod nsimd_sleef_fmod_avx2_f64 +#define xfmodf nsimd_sleef_fmod_avx2_f32 +#define xremainder nsimd_sleef_remainder_avx2_f64 +#define xremainderf nsimd_sleef_remainder_avx2_f32 +#define xmodf nsimd_sleef_modf_avx2_f64 +#define xmodff nsimd_sleef_modf_avx2_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_avx2_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_avx2_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_avx2_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_avx2_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_avx2_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_avx2_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_avx2_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_avx2_f32 +#define xgetInt nsimd_sleef_getInt_avx2_f64 +#define xgetIntf nsimd_sleef_getInt_avx2_f32 +#define xgetPtr nsimd_sleef_getPtr_avx2_f64 +#define xgetPtrf nsimd_sleef_getPtr_avx2_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_avx2_f64 +#define xsinf nsimd_sleef_sin_u35_avx2_f32 +#define xcos nsimd_sleef_cos_u35_avx2_f64 +#define xcosf nsimd_sleef_cos_u35_avx2_f32 +#define xsincos nsimd_sleef_sincos_u35_avx2_f64 +#define xsincosf nsimd_sleef_sincos_u35_avx2_f32 +#define xtan nsimd_sleef_tan_u35_avx2_f64 +#define xtanf nsimd_sleef_tan_u35_avx2_f32 +#define xasin nsimd_sleef_asin_u35_avx2_f64 +#define xasinf nsimd_sleef_asin_u35_avx2_f32 +#define xacos nsimd_sleef_acos_u35_avx2_f64 +#define xacosf nsimd_sleef_acos_u35_avx2_f32 +#define xatan nsimd_sleef_atan_u35_avx2_f64 +#define xatanf nsimd_sleef_atan_u35_avx2_f32 +#define xatan2 nsimd_sleef_atan2_u35_avx2_f64 +#define xatan2f nsimd_sleef_atan2_u35_avx2_f32 +#define xlog nsimd_sleef_log_u35_avx2_f64 +#define xlogf nsimd_sleef_log_u35_avx2_f32 +#define xcbrt nsimd_sleef_cbrt_u35_avx2_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_avx2_f32 +#define xsin_u1 nsimd_sleef_sin_u10_avx2_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_avx2_f32 +#define xcos_u1 nsimd_sleef_cos_u10_avx2_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_avx2_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_avx2_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_avx2_f32 +#define xtan_u1 nsimd_sleef_tan_u10_avx2_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_avx2_f32 +#define xasin_u1 nsimd_sleef_asin_u10_avx2_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_avx2_f32 +#define xacos_u1 nsimd_sleef_acos_u10_avx2_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_avx2_f32 +#define xatan_u1 nsimd_sleef_atan_u10_avx2_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_avx2_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_avx2_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_avx2_f32 +#define xlog_u1 nsimd_sleef_log_u10_avx2_f64 +#define xlogf_u1 nsimd_sleef_log_u10_avx2_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_avx2_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_avx2_f32 +#define xexp nsimd_sleef_exp_u10_avx2_f64 +#define xexpf nsimd_sleef_exp_u10_avx2_f32 +#define xpow nsimd_sleef_pow_u10_avx2_f64 +#define xpowf nsimd_sleef_pow_u10_avx2_f32 +#define xsinh nsimd_sleef_sinh_u10_avx2_f64 +#define xsinhf nsimd_sleef_sinh_u10_avx2_f32 +#define xcosh nsimd_sleef_cosh_u10_avx2_f64 +#define xcoshf nsimd_sleef_cosh_u10_avx2_f32 +#define xtanh nsimd_sleef_tanh_u10_avx2_f64 +#define xtanhf nsimd_sleef_tanh_u10_avx2_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_avx2_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_avx2_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_avx2_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_avx2_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_avx2_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_avx2_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_avx2_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_avx2_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_avx2_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_avx2_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_avx2_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_avx2_f32 +#define xasinh nsimd_sleef_asinh_u10_avx2_f64 +#define xasinhf nsimd_sleef_asinh_u10_avx2_f32 +#define xacosh nsimd_sleef_acosh_u10_avx2_f64 +#define xacoshf nsimd_sleef_acosh_u10_avx2_f32 +#define xatanh nsimd_sleef_atanh_u10_avx2_f64 +#define xatanhf nsimd_sleef_atanh_u10_avx2_f32 +#define xexp2 nsimd_sleef_exp2_u10_avx2_f64 +#define xexp2f nsimd_sleef_exp2_u10_avx2_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_avx2_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_avx2_f32 +#define xexp10 nsimd_sleef_exp10_u10_avx2_f64 +#define xexp10f nsimd_sleef_exp10_u10_avx2_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_avx2_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_avx2_f32 +#define xexpm1 nsimd_sleef_expm1_u10_avx2_f64 +#define xexpm1f nsimd_sleef_expm1_u10_avx2_f32 +#define xlog10 nsimd_sleef_log10_u10_avx2_f64 +#define xlog10f nsimd_sleef_log10_u10_avx2_f32 +#define xlog2 nsimd_sleef_log2_u10_avx2_f64 +#define xlog2f nsimd_sleef_log2_u10_avx2_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_avx2_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_avx2_f32 +#define xlog1p nsimd_sleef_log1p_u10_avx2_f64 +#define xlog1pf nsimd_sleef_log1p_u10_avx2_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_avx2_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_avx2_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_avx2_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_avx2_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_avx2_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_avx2_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_avx2_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_avx2_f32 +#define xldexp nsimd_sleef_ldexp_avx2_f64 +#define xldexpf nsimd_sleef_ldexp_avx2_f32 +#define xilogb nsimd_sleef_ilogb_avx2_f64 +#define xilogbf nsimd_sleef_ilogb_avx2_f32 +#define xfma nsimd_sleef_fma_avx2_f64 +#define xfmaf nsimd_sleef_fma_avx2_f32 +#define xsqrt nsimd_sleef_sqrt_avx2_f64 +#define xsqrtf nsimd_sleef_sqrt_avx2_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_avx2_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_avx2_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_avx2_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_avx2_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_avx2_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_avx2_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_avx2_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_avx2_f32 +#define xfabs nsimd_sleef_fabs_avx2_f64 +#define xfabsf nsimd_sleef_fabs_avx2_f32 +#define xcopysign nsimd_sleef_copysign_avx2_f64 +#define xcopysignf nsimd_sleef_copysign_avx2_f32 +#define xfmax nsimd_sleef_fmax_avx2_f64 +#define xfmaxf nsimd_sleef_fmax_avx2_f32 +#define xfmin nsimd_sleef_fmin_avx2_f64 +#define xfminf nsimd_sleef_fmin_avx2_f32 +#define xfdim nsimd_sleef_fdim_avx2_f64 +#define xfdimf nsimd_sleef_fdim_avx2_f32 +#define xtrunc nsimd_sleef_trunc_avx2_f64 +#define xtruncf nsimd_sleef_trunc_avx2_f32 +#define xfloor nsimd_sleef_floor_avx2_f64 +#define xfloorf nsimd_sleef_floor_avx2_f32 +#define xceil nsimd_sleef_ceil_avx2_f64 +#define xceilf nsimd_sleef_ceil_avx2_f32 +#define xround nsimd_sleef_round_avx2_f64 +#define xroundf nsimd_sleef_round_avx2_f32 +#define xrint nsimd_sleef_rint_avx2_f64 +#define xrintf nsimd_sleef_rint_avx2_f32 +#define xnextafter nsimd_sleef_nextafter_avx2_f64 +#define xnextafterf nsimd_sleef_nextafter_avx2_f32 +#define xfrfrexp nsimd_sleef_frfrexp_avx2_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_avx2_f32 +#define xexpfrexp nsimd_sleef_expfrexp_avx2_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_avx2_f32 +#define xfmod nsimd_sleef_fmod_avx2_f64 +#define xfmodf nsimd_sleef_fmod_avx2_f32 +#define xremainder nsimd_sleef_remainder_avx2_f64 +#define xremainderf nsimd_sleef_remainder_avx2_f32 +#define xmodf nsimd_sleef_modf_avx2_f64 +#define xmodff nsimd_sleef_modf_avx2_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_avx2_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_avx2_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_avx2_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_avx2_f32 +#define xerf_u1 nsimd_sleef_erf_u10_avx2_f64 +#define xerff_u1 nsimd_sleef_erf_u10_avx2_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_avx2_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_avx2_f32 +#define xgetInt nsimd_sleef_getInt_avx2_f64 +#define xgetIntf nsimd_sleef_getInt_avx2_f32 +#define xgetPtr nsimd_sleef_getPtr_avx2_f64 +#define xgetPtrf nsimd_sleef_getPtr_avx2_f32 + + #endif + + #define rempi nsimd_sleef_rempi_avx2 + #define rempif nsimd_sleef_rempif_avx2 + #define rempisub nsimd_sleef_rempisub_avx2 + #define rempisubf nsimd_sleef_rempisubf_avx2 + #define gammak nsimd_gammak_avx2 + #define gammafk nsimd_gammafk_avx2 + + #endif + + + +#endif + diff --git a/src/renameavx512f.h b/src/renameavx512f.h new file mode 100644 index 00000000..3513ef48 --- /dev/null +++ b/src/renameavx512f.h @@ -0,0 +1,667 @@ +#ifndef RENAMEAVX512F_H + #define RENAMEAVX512F_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions avx512_knl */ + + #ifdef NSIMD_AVX512_KNL + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_avx512_knl_f64 +#define xsinf nsimd_sleef_sin_u35d_avx512_knl_f32 +#define xcos nsimd_sleef_cos_u35d_avx512_knl_f64 +#define xcosf nsimd_sleef_cos_u35d_avx512_knl_f32 +#define xsincos nsimd_sleef_sincos_u35d_avx512_knl_f64 +#define xsincosf nsimd_sleef_sincos_u35d_avx512_knl_f32 +#define xtan nsimd_sleef_tan_u35d_avx512_knl_f64 +#define xtanf nsimd_sleef_tan_u35d_avx512_knl_f32 +#define xasin nsimd_sleef_asin_u35d_avx512_knl_f64 +#define xasinf nsimd_sleef_asin_u35d_avx512_knl_f32 +#define xacos nsimd_sleef_acos_u35d_avx512_knl_f64 +#define xacosf nsimd_sleef_acos_u35d_avx512_knl_f32 +#define xatan nsimd_sleef_atan_u35d_avx512_knl_f64 +#define xatanf nsimd_sleef_atan_u35d_avx512_knl_f32 +#define xatan2 nsimd_sleef_atan2_u35d_avx512_knl_f64 +#define xatan2f nsimd_sleef_atan2_u35d_avx512_knl_f32 +#define xlog nsimd_sleef_log_u35d_avx512_knl_f64 +#define xlogf nsimd_sleef_log_u35d_avx512_knl_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_avx512_knl_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_avx512_knl_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_avx512_knl_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_avx512_knl_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_avx512_knl_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_avx512_knl_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_avx512_knl_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_avx512_knl_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_avx512_knl_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_avx512_knl_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_avx512_knl_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_avx512_knl_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_avx512_knl_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_avx512_knl_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_avx512_knl_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_avx512_knl_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_avx512_knl_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_avx512_knl_f32 +#define xlog_u1 nsimd_sleef_log_u10d_avx512_knl_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_avx512_knl_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_avx512_knl_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_avx512_knl_f32 +#define xexp nsimd_sleef_exp_u10d_avx512_knl_f64 +#define xexpf nsimd_sleef_exp_u10d_avx512_knl_f32 +#define xpow nsimd_sleef_pow_u10d_avx512_knl_f64 +#define xpowf nsimd_sleef_pow_u10d_avx512_knl_f32 +#define xsinh nsimd_sleef_sinh_u10d_avx512_knl_f64 +#define xsinhf nsimd_sleef_sinh_u10d_avx512_knl_f32 +#define xcosh nsimd_sleef_cosh_u10d_avx512_knl_f64 +#define xcoshf nsimd_sleef_cosh_u10d_avx512_knl_f32 +#define xtanh nsimd_sleef_tanh_u10d_avx512_knl_f64 +#define xtanhf nsimd_sleef_tanh_u10d_avx512_knl_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_avx512_knl_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_avx512_knl_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_avx512_knl_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_avx512_knl_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_avx512_knl_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_avx512_knl_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_avx512_knl_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_avx512_knl_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_avx512_knl_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_avx512_knl_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_avx512_knl_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_avx512_knl_f32 +#define xasinh nsimd_sleef_asinh_u10d_avx512_knl_f64 +#define xasinhf nsimd_sleef_asinh_u10d_avx512_knl_f32 +#define xacosh nsimd_sleef_acosh_u10d_avx512_knl_f64 +#define xacoshf nsimd_sleef_acosh_u10d_avx512_knl_f32 +#define xatanh nsimd_sleef_atanh_u10d_avx512_knl_f64 +#define xatanhf nsimd_sleef_atanh_u10d_avx512_knl_f32 +#define xexp2 nsimd_sleef_exp2_u10d_avx512_knl_f64 +#define xexp2f nsimd_sleef_exp2_u10d_avx512_knl_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_avx512_knl_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_avx512_knl_f32 +#define xexp10 nsimd_sleef_exp10_u10d_avx512_knl_f64 +#define xexp10f nsimd_sleef_exp10_u10d_avx512_knl_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_avx512_knl_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_avx512_knl_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_avx512_knl_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_avx512_knl_f32 +#define xlog10 nsimd_sleef_log10_u10d_avx512_knl_f64 +#define xlog10f nsimd_sleef_log10_u10d_avx512_knl_f32 +#define xlog2 nsimd_sleef_log2_u10d_avx512_knl_f64 +#define xlog2f nsimd_sleef_log2_u10d_avx512_knl_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_avx512_knl_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_avx512_knl_f32 +#define xlog1p nsimd_sleef_log1p_u10d_avx512_knl_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_avx512_knl_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_avx512_knl_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_avx512_knl_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_avx512_knl_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_avx512_knl_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_avx512_knl_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_avx512_knl_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_avx512_knl_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_avx512_knl_f32 +#define xldexp nsimd_sleef_ldexp_avx512_knl_f64 +#define xldexpf nsimd_sleef_ldexp_avx512_knl_f32 +#define xilogb nsimd_sleef_ilogb_avx512_knl_f64 +#define xilogbf nsimd_sleef_ilogb_avx512_knl_f32 +#define xfma nsimd_sleef_fma_avx512_knl_f64 +#define xfmaf nsimd_sleef_fma_avx512_knl_f32 +#define xsqrt nsimd_sleef_sqrt_avx512_knl_f64 +#define xsqrtf nsimd_sleef_sqrt_avx512_knl_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_avx512_knl_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_avx512_knl_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_avx512_knl_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_avx512_knl_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_avx512_knl_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_avx512_knl_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_avx512_knl_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_avx512_knl_f32 +#define xfabs nsimd_sleef_fabs_avx512_knl_f64 +#define xfabsf nsimd_sleef_fabs_avx512_knl_f32 +#define xcopysign nsimd_sleef_copysign_avx512_knl_f64 +#define xcopysignf nsimd_sleef_copysign_avx512_knl_f32 +#define xfmax nsimd_sleef_fmax_avx512_knl_f64 +#define xfmaxf nsimd_sleef_fmax_avx512_knl_f32 +#define xfmin nsimd_sleef_fmin_avx512_knl_f64 +#define xfminf nsimd_sleef_fmin_avx512_knl_f32 +#define xfdim nsimd_sleef_fdim_avx512_knl_f64 +#define xfdimf nsimd_sleef_fdim_avx512_knl_f32 +#define xtrunc nsimd_sleef_trunc_avx512_knl_f64 +#define xtruncf nsimd_sleef_trunc_avx512_knl_f32 +#define xfloor nsimd_sleef_floor_avx512_knl_f64 +#define xfloorf nsimd_sleef_floor_avx512_knl_f32 +#define xceil nsimd_sleef_ceil_avx512_knl_f64 +#define xceilf nsimd_sleef_ceil_avx512_knl_f32 +#define xround nsimd_sleef_round_avx512_knl_f64 +#define xroundf nsimd_sleef_round_avx512_knl_f32 +#define xrint nsimd_sleef_rint_avx512_knl_f64 +#define xrintf nsimd_sleef_rint_avx512_knl_f32 +#define xnextafter nsimd_sleef_nextafter_avx512_knl_f64 +#define xnextafterf nsimd_sleef_nextafter_avx512_knl_f32 +#define xfrfrexp nsimd_sleef_frfrexp_avx512_knl_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_avx512_knl_f32 +#define xexpfrexp nsimd_sleef_expfrexp_avx512_knl_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_avx512_knl_f32 +#define xfmod nsimd_sleef_fmod_avx512_knl_f64 +#define xfmodf nsimd_sleef_fmod_avx512_knl_f32 +#define xremainder nsimd_sleef_remainder_avx512_knl_f64 +#define xremainderf nsimd_sleef_remainder_avx512_knl_f32 +#define xmodf nsimd_sleef_modf_avx512_knl_f64 +#define xmodff nsimd_sleef_modf_avx512_knl_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_avx512_knl_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_avx512_knl_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_avx512_knl_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_avx512_knl_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_avx512_knl_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_avx512_knl_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_avx512_knl_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_avx512_knl_f32 +#define xgetInt nsimd_sleef_getInt_avx512_knl_f64 +#define xgetIntf nsimd_sleef_getInt_avx512_knl_f32 +#define xgetPtr nsimd_sleef_getPtr_avx512_knl_f64 +#define xgetPtrf nsimd_sleef_getPtr_avx512_knl_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_avx512_knl_f64 +#define xsinf nsimd_sleef_sin_u35_avx512_knl_f32 +#define xcos nsimd_sleef_cos_u35_avx512_knl_f64 +#define xcosf nsimd_sleef_cos_u35_avx512_knl_f32 +#define xsincos nsimd_sleef_sincos_u35_avx512_knl_f64 +#define xsincosf nsimd_sleef_sincos_u35_avx512_knl_f32 +#define xtan nsimd_sleef_tan_u35_avx512_knl_f64 +#define xtanf nsimd_sleef_tan_u35_avx512_knl_f32 +#define xasin nsimd_sleef_asin_u35_avx512_knl_f64 +#define xasinf nsimd_sleef_asin_u35_avx512_knl_f32 +#define xacos nsimd_sleef_acos_u35_avx512_knl_f64 +#define xacosf nsimd_sleef_acos_u35_avx512_knl_f32 +#define xatan nsimd_sleef_atan_u35_avx512_knl_f64 +#define xatanf nsimd_sleef_atan_u35_avx512_knl_f32 +#define xatan2 nsimd_sleef_atan2_u35_avx512_knl_f64 +#define xatan2f nsimd_sleef_atan2_u35_avx512_knl_f32 +#define xlog nsimd_sleef_log_u35_avx512_knl_f64 +#define xlogf nsimd_sleef_log_u35_avx512_knl_f32 +#define xcbrt nsimd_sleef_cbrt_u35_avx512_knl_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_avx512_knl_f32 +#define xsin_u1 nsimd_sleef_sin_u10_avx512_knl_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_avx512_knl_f32 +#define xcos_u1 nsimd_sleef_cos_u10_avx512_knl_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_avx512_knl_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_avx512_knl_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_avx512_knl_f32 +#define xtan_u1 nsimd_sleef_tan_u10_avx512_knl_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_avx512_knl_f32 +#define xasin_u1 nsimd_sleef_asin_u10_avx512_knl_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_avx512_knl_f32 +#define xacos_u1 nsimd_sleef_acos_u10_avx512_knl_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_avx512_knl_f32 +#define xatan_u1 nsimd_sleef_atan_u10_avx512_knl_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_avx512_knl_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_avx512_knl_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_avx512_knl_f32 +#define xlog_u1 nsimd_sleef_log_u10_avx512_knl_f64 +#define xlogf_u1 nsimd_sleef_log_u10_avx512_knl_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_avx512_knl_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_avx512_knl_f32 +#define xexp nsimd_sleef_exp_u10_avx512_knl_f64 +#define xexpf nsimd_sleef_exp_u10_avx512_knl_f32 +#define xpow nsimd_sleef_pow_u10_avx512_knl_f64 +#define xpowf nsimd_sleef_pow_u10_avx512_knl_f32 +#define xsinh nsimd_sleef_sinh_u10_avx512_knl_f64 +#define xsinhf nsimd_sleef_sinh_u10_avx512_knl_f32 +#define xcosh nsimd_sleef_cosh_u10_avx512_knl_f64 +#define xcoshf nsimd_sleef_cosh_u10_avx512_knl_f32 +#define xtanh nsimd_sleef_tanh_u10_avx512_knl_f64 +#define xtanhf nsimd_sleef_tanh_u10_avx512_knl_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_avx512_knl_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_avx512_knl_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_avx512_knl_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_avx512_knl_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_avx512_knl_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_avx512_knl_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_avx512_knl_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_avx512_knl_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_avx512_knl_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_avx512_knl_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_avx512_knl_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_avx512_knl_f32 +#define xasinh nsimd_sleef_asinh_u10_avx512_knl_f64 +#define xasinhf nsimd_sleef_asinh_u10_avx512_knl_f32 +#define xacosh nsimd_sleef_acosh_u10_avx512_knl_f64 +#define xacoshf nsimd_sleef_acosh_u10_avx512_knl_f32 +#define xatanh nsimd_sleef_atanh_u10_avx512_knl_f64 +#define xatanhf nsimd_sleef_atanh_u10_avx512_knl_f32 +#define xexp2 nsimd_sleef_exp2_u10_avx512_knl_f64 +#define xexp2f nsimd_sleef_exp2_u10_avx512_knl_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_avx512_knl_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_avx512_knl_f32 +#define xexp10 nsimd_sleef_exp10_u10_avx512_knl_f64 +#define xexp10f nsimd_sleef_exp10_u10_avx512_knl_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_avx512_knl_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_avx512_knl_f32 +#define xexpm1 nsimd_sleef_expm1_u10_avx512_knl_f64 +#define xexpm1f nsimd_sleef_expm1_u10_avx512_knl_f32 +#define xlog10 nsimd_sleef_log10_u10_avx512_knl_f64 +#define xlog10f nsimd_sleef_log10_u10_avx512_knl_f32 +#define xlog2 nsimd_sleef_log2_u10_avx512_knl_f64 +#define xlog2f nsimd_sleef_log2_u10_avx512_knl_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_avx512_knl_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_avx512_knl_f32 +#define xlog1p nsimd_sleef_log1p_u10_avx512_knl_f64 +#define xlog1pf nsimd_sleef_log1p_u10_avx512_knl_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_avx512_knl_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_avx512_knl_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_avx512_knl_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_avx512_knl_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_avx512_knl_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_avx512_knl_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_avx512_knl_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_avx512_knl_f32 +#define xldexp nsimd_sleef_ldexp_avx512_knl_f64 +#define xldexpf nsimd_sleef_ldexp_avx512_knl_f32 +#define xilogb nsimd_sleef_ilogb_avx512_knl_f64 +#define xilogbf nsimd_sleef_ilogb_avx512_knl_f32 +#define xfma nsimd_sleef_fma_avx512_knl_f64 +#define xfmaf nsimd_sleef_fma_avx512_knl_f32 +#define xsqrt nsimd_sleef_sqrt_avx512_knl_f64 +#define xsqrtf nsimd_sleef_sqrt_avx512_knl_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_avx512_knl_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_avx512_knl_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_avx512_knl_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_avx512_knl_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_avx512_knl_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_avx512_knl_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_avx512_knl_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_avx512_knl_f32 +#define xfabs nsimd_sleef_fabs_avx512_knl_f64 +#define xfabsf nsimd_sleef_fabs_avx512_knl_f32 +#define xcopysign nsimd_sleef_copysign_avx512_knl_f64 +#define xcopysignf nsimd_sleef_copysign_avx512_knl_f32 +#define xfmax nsimd_sleef_fmax_avx512_knl_f64 +#define xfmaxf nsimd_sleef_fmax_avx512_knl_f32 +#define xfmin nsimd_sleef_fmin_avx512_knl_f64 +#define xfminf nsimd_sleef_fmin_avx512_knl_f32 +#define xfdim nsimd_sleef_fdim_avx512_knl_f64 +#define xfdimf nsimd_sleef_fdim_avx512_knl_f32 +#define xtrunc nsimd_sleef_trunc_avx512_knl_f64 +#define xtruncf nsimd_sleef_trunc_avx512_knl_f32 +#define xfloor nsimd_sleef_floor_avx512_knl_f64 +#define xfloorf nsimd_sleef_floor_avx512_knl_f32 +#define xceil nsimd_sleef_ceil_avx512_knl_f64 +#define xceilf nsimd_sleef_ceil_avx512_knl_f32 +#define xround nsimd_sleef_round_avx512_knl_f64 +#define xroundf nsimd_sleef_round_avx512_knl_f32 +#define xrint nsimd_sleef_rint_avx512_knl_f64 +#define xrintf nsimd_sleef_rint_avx512_knl_f32 +#define xnextafter nsimd_sleef_nextafter_avx512_knl_f64 +#define xnextafterf nsimd_sleef_nextafter_avx512_knl_f32 +#define xfrfrexp nsimd_sleef_frfrexp_avx512_knl_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_avx512_knl_f32 +#define xexpfrexp nsimd_sleef_expfrexp_avx512_knl_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_avx512_knl_f32 +#define xfmod nsimd_sleef_fmod_avx512_knl_f64 +#define xfmodf nsimd_sleef_fmod_avx512_knl_f32 +#define xremainder nsimd_sleef_remainder_avx512_knl_f64 +#define xremainderf nsimd_sleef_remainder_avx512_knl_f32 +#define xmodf nsimd_sleef_modf_avx512_knl_f64 +#define xmodff nsimd_sleef_modf_avx512_knl_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_avx512_knl_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_avx512_knl_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_avx512_knl_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_avx512_knl_f32 +#define xerf_u1 nsimd_sleef_erf_u10_avx512_knl_f64 +#define xerff_u1 nsimd_sleef_erf_u10_avx512_knl_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_avx512_knl_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_avx512_knl_f32 +#define xgetInt nsimd_sleef_getInt_avx512_knl_f64 +#define xgetIntf nsimd_sleef_getInt_avx512_knl_f32 +#define xgetPtr nsimd_sleef_getPtr_avx512_knl_f64 +#define xgetPtrf nsimd_sleef_getPtr_avx512_knl_f32 + + #endif + + #define rempi nsimd_sleef_rempi_avx512_knl + #define rempif nsimd_sleef_rempif_avx512_knl + #define rempisub nsimd_sleef_rempisub_avx512_knl + #define rempisubf nsimd_sleef_rempisubf_avx512_knl + #define gammak nsimd_gammak_avx512_knl + #define gammafk nsimd_gammafk_avx512_knl + + #endif + + /* ------------------------------------------------------------------------- */ + /* Naming of functions avx512_skylake */ + + #ifdef NSIMD_AVX512_SKYLAKE + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_avx512_skylake_f64 +#define xsinf nsimd_sleef_sin_u35d_avx512_skylake_f32 +#define xcos nsimd_sleef_cos_u35d_avx512_skylake_f64 +#define xcosf nsimd_sleef_cos_u35d_avx512_skylake_f32 +#define xsincos nsimd_sleef_sincos_u35d_avx512_skylake_f64 +#define xsincosf nsimd_sleef_sincos_u35d_avx512_skylake_f32 +#define xtan nsimd_sleef_tan_u35d_avx512_skylake_f64 +#define xtanf nsimd_sleef_tan_u35d_avx512_skylake_f32 +#define xasin nsimd_sleef_asin_u35d_avx512_skylake_f64 +#define xasinf nsimd_sleef_asin_u35d_avx512_skylake_f32 +#define xacos nsimd_sleef_acos_u35d_avx512_skylake_f64 +#define xacosf nsimd_sleef_acos_u35d_avx512_skylake_f32 +#define xatan nsimd_sleef_atan_u35d_avx512_skylake_f64 +#define xatanf nsimd_sleef_atan_u35d_avx512_skylake_f32 +#define xatan2 nsimd_sleef_atan2_u35d_avx512_skylake_f64 +#define xatan2f nsimd_sleef_atan2_u35d_avx512_skylake_f32 +#define xlog nsimd_sleef_log_u35d_avx512_skylake_f64 +#define xlogf nsimd_sleef_log_u35d_avx512_skylake_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_avx512_skylake_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_avx512_skylake_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_avx512_skylake_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_avx512_skylake_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_avx512_skylake_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_avx512_skylake_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_avx512_skylake_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_avx512_skylake_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_avx512_skylake_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_avx512_skylake_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_avx512_skylake_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_avx512_skylake_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_avx512_skylake_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_avx512_skylake_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_avx512_skylake_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_avx512_skylake_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_avx512_skylake_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_avx512_skylake_f32 +#define xlog_u1 nsimd_sleef_log_u10d_avx512_skylake_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_avx512_skylake_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_avx512_skylake_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_avx512_skylake_f32 +#define xexp nsimd_sleef_exp_u10d_avx512_skylake_f64 +#define xexpf nsimd_sleef_exp_u10d_avx512_skylake_f32 +#define xpow nsimd_sleef_pow_u10d_avx512_skylake_f64 +#define xpowf nsimd_sleef_pow_u10d_avx512_skylake_f32 +#define xsinh nsimd_sleef_sinh_u10d_avx512_skylake_f64 +#define xsinhf nsimd_sleef_sinh_u10d_avx512_skylake_f32 +#define xcosh nsimd_sleef_cosh_u10d_avx512_skylake_f64 +#define xcoshf nsimd_sleef_cosh_u10d_avx512_skylake_f32 +#define xtanh nsimd_sleef_tanh_u10d_avx512_skylake_f64 +#define xtanhf nsimd_sleef_tanh_u10d_avx512_skylake_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_avx512_skylake_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_avx512_skylake_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_avx512_skylake_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_avx512_skylake_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_avx512_skylake_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_avx512_skylake_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_avx512_skylake_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_avx512_skylake_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_avx512_skylake_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_avx512_skylake_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_avx512_skylake_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_avx512_skylake_f32 +#define xasinh nsimd_sleef_asinh_u10d_avx512_skylake_f64 +#define xasinhf nsimd_sleef_asinh_u10d_avx512_skylake_f32 +#define xacosh nsimd_sleef_acosh_u10d_avx512_skylake_f64 +#define xacoshf nsimd_sleef_acosh_u10d_avx512_skylake_f32 +#define xatanh nsimd_sleef_atanh_u10d_avx512_skylake_f64 +#define xatanhf nsimd_sleef_atanh_u10d_avx512_skylake_f32 +#define xexp2 nsimd_sleef_exp2_u10d_avx512_skylake_f64 +#define xexp2f nsimd_sleef_exp2_u10d_avx512_skylake_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_avx512_skylake_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_avx512_skylake_f32 +#define xexp10 nsimd_sleef_exp10_u10d_avx512_skylake_f64 +#define xexp10f nsimd_sleef_exp10_u10d_avx512_skylake_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_avx512_skylake_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_avx512_skylake_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_avx512_skylake_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_avx512_skylake_f32 +#define xlog10 nsimd_sleef_log10_u10d_avx512_skylake_f64 +#define xlog10f nsimd_sleef_log10_u10d_avx512_skylake_f32 +#define xlog2 nsimd_sleef_log2_u10d_avx512_skylake_f64 +#define xlog2f nsimd_sleef_log2_u10d_avx512_skylake_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_avx512_skylake_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_avx512_skylake_f32 +#define xlog1p nsimd_sleef_log1p_u10d_avx512_skylake_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_avx512_skylake_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_avx512_skylake_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_avx512_skylake_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_avx512_skylake_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_avx512_skylake_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_avx512_skylake_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_avx512_skylake_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_avx512_skylake_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_avx512_skylake_f32 +#define xldexp nsimd_sleef_ldexp_avx512_skylake_f64 +#define xldexpf nsimd_sleef_ldexp_avx512_skylake_f32 +#define xilogb nsimd_sleef_ilogb_avx512_skylake_f64 +#define xilogbf nsimd_sleef_ilogb_avx512_skylake_f32 +#define xfma nsimd_sleef_fma_avx512_skylake_f64 +#define xfmaf nsimd_sleef_fma_avx512_skylake_f32 +#define xsqrt nsimd_sleef_sqrt_avx512_skylake_f64 +#define xsqrtf nsimd_sleef_sqrt_avx512_skylake_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_avx512_skylake_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_avx512_skylake_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_avx512_skylake_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_avx512_skylake_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_avx512_skylake_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_avx512_skylake_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_avx512_skylake_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_avx512_skylake_f32 +#define xfabs nsimd_sleef_fabs_avx512_skylake_f64 +#define xfabsf nsimd_sleef_fabs_avx512_skylake_f32 +#define xcopysign nsimd_sleef_copysign_avx512_skylake_f64 +#define xcopysignf nsimd_sleef_copysign_avx512_skylake_f32 +#define xfmax nsimd_sleef_fmax_avx512_skylake_f64 +#define xfmaxf nsimd_sleef_fmax_avx512_skylake_f32 +#define xfmin nsimd_sleef_fmin_avx512_skylake_f64 +#define xfminf nsimd_sleef_fmin_avx512_skylake_f32 +#define xfdim nsimd_sleef_fdim_avx512_skylake_f64 +#define xfdimf nsimd_sleef_fdim_avx512_skylake_f32 +#define xtrunc nsimd_sleef_trunc_avx512_skylake_f64 +#define xtruncf nsimd_sleef_trunc_avx512_skylake_f32 +#define xfloor nsimd_sleef_floor_avx512_skylake_f64 +#define xfloorf nsimd_sleef_floor_avx512_skylake_f32 +#define xceil nsimd_sleef_ceil_avx512_skylake_f64 +#define xceilf nsimd_sleef_ceil_avx512_skylake_f32 +#define xround nsimd_sleef_round_avx512_skylake_f64 +#define xroundf nsimd_sleef_round_avx512_skylake_f32 +#define xrint nsimd_sleef_rint_avx512_skylake_f64 +#define xrintf nsimd_sleef_rint_avx512_skylake_f32 +#define xnextafter nsimd_sleef_nextafter_avx512_skylake_f64 +#define xnextafterf nsimd_sleef_nextafter_avx512_skylake_f32 +#define xfrfrexp nsimd_sleef_frfrexp_avx512_skylake_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_avx512_skylake_f32 +#define xexpfrexp nsimd_sleef_expfrexp_avx512_skylake_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_avx512_skylake_f32 +#define xfmod nsimd_sleef_fmod_avx512_skylake_f64 +#define xfmodf nsimd_sleef_fmod_avx512_skylake_f32 +#define xremainder nsimd_sleef_remainder_avx512_skylake_f64 +#define xremainderf nsimd_sleef_remainder_avx512_skylake_f32 +#define xmodf nsimd_sleef_modf_avx512_skylake_f64 +#define xmodff nsimd_sleef_modf_avx512_skylake_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_avx512_skylake_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_avx512_skylake_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_avx512_skylake_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_avx512_skylake_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_avx512_skylake_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_avx512_skylake_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_avx512_skylake_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_avx512_skylake_f32 +#define xgetInt nsimd_sleef_getInt_avx512_skylake_f64 +#define xgetIntf nsimd_sleef_getInt_avx512_skylake_f32 +#define xgetPtr nsimd_sleef_getPtr_avx512_skylake_f64 +#define xgetPtrf nsimd_sleef_getPtr_avx512_skylake_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_avx512_skylake_f64 +#define xsinf nsimd_sleef_sin_u35_avx512_skylake_f32 +#define xcos nsimd_sleef_cos_u35_avx512_skylake_f64 +#define xcosf nsimd_sleef_cos_u35_avx512_skylake_f32 +#define xsincos nsimd_sleef_sincos_u35_avx512_skylake_f64 +#define xsincosf nsimd_sleef_sincos_u35_avx512_skylake_f32 +#define xtan nsimd_sleef_tan_u35_avx512_skylake_f64 +#define xtanf nsimd_sleef_tan_u35_avx512_skylake_f32 +#define xasin nsimd_sleef_asin_u35_avx512_skylake_f64 +#define xasinf nsimd_sleef_asin_u35_avx512_skylake_f32 +#define xacos nsimd_sleef_acos_u35_avx512_skylake_f64 +#define xacosf nsimd_sleef_acos_u35_avx512_skylake_f32 +#define xatan nsimd_sleef_atan_u35_avx512_skylake_f64 +#define xatanf nsimd_sleef_atan_u35_avx512_skylake_f32 +#define xatan2 nsimd_sleef_atan2_u35_avx512_skylake_f64 +#define xatan2f nsimd_sleef_atan2_u35_avx512_skylake_f32 +#define xlog nsimd_sleef_log_u35_avx512_skylake_f64 +#define xlogf nsimd_sleef_log_u35_avx512_skylake_f32 +#define xcbrt nsimd_sleef_cbrt_u35_avx512_skylake_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_avx512_skylake_f32 +#define xsin_u1 nsimd_sleef_sin_u10_avx512_skylake_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_avx512_skylake_f32 +#define xcos_u1 nsimd_sleef_cos_u10_avx512_skylake_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_avx512_skylake_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_avx512_skylake_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_avx512_skylake_f32 +#define xtan_u1 nsimd_sleef_tan_u10_avx512_skylake_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_avx512_skylake_f32 +#define xasin_u1 nsimd_sleef_asin_u10_avx512_skylake_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_avx512_skylake_f32 +#define xacos_u1 nsimd_sleef_acos_u10_avx512_skylake_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_avx512_skylake_f32 +#define xatan_u1 nsimd_sleef_atan_u10_avx512_skylake_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_avx512_skylake_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_avx512_skylake_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_avx512_skylake_f32 +#define xlog_u1 nsimd_sleef_log_u10_avx512_skylake_f64 +#define xlogf_u1 nsimd_sleef_log_u10_avx512_skylake_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_avx512_skylake_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_avx512_skylake_f32 +#define xexp nsimd_sleef_exp_u10_avx512_skylake_f64 +#define xexpf nsimd_sleef_exp_u10_avx512_skylake_f32 +#define xpow nsimd_sleef_pow_u10_avx512_skylake_f64 +#define xpowf nsimd_sleef_pow_u10_avx512_skylake_f32 +#define xsinh nsimd_sleef_sinh_u10_avx512_skylake_f64 +#define xsinhf nsimd_sleef_sinh_u10_avx512_skylake_f32 +#define xcosh nsimd_sleef_cosh_u10_avx512_skylake_f64 +#define xcoshf nsimd_sleef_cosh_u10_avx512_skylake_f32 +#define xtanh nsimd_sleef_tanh_u10_avx512_skylake_f64 +#define xtanhf nsimd_sleef_tanh_u10_avx512_skylake_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_avx512_skylake_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_avx512_skylake_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_avx512_skylake_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_avx512_skylake_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_avx512_skylake_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_avx512_skylake_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_avx512_skylake_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_avx512_skylake_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_avx512_skylake_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_avx512_skylake_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_avx512_skylake_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_avx512_skylake_f32 +#define xasinh nsimd_sleef_asinh_u10_avx512_skylake_f64 +#define xasinhf nsimd_sleef_asinh_u10_avx512_skylake_f32 +#define xacosh nsimd_sleef_acosh_u10_avx512_skylake_f64 +#define xacoshf nsimd_sleef_acosh_u10_avx512_skylake_f32 +#define xatanh nsimd_sleef_atanh_u10_avx512_skylake_f64 +#define xatanhf nsimd_sleef_atanh_u10_avx512_skylake_f32 +#define xexp2 nsimd_sleef_exp2_u10_avx512_skylake_f64 +#define xexp2f nsimd_sleef_exp2_u10_avx512_skylake_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_avx512_skylake_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_avx512_skylake_f32 +#define xexp10 nsimd_sleef_exp10_u10_avx512_skylake_f64 +#define xexp10f nsimd_sleef_exp10_u10_avx512_skylake_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_avx512_skylake_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_avx512_skylake_f32 +#define xexpm1 nsimd_sleef_expm1_u10_avx512_skylake_f64 +#define xexpm1f nsimd_sleef_expm1_u10_avx512_skylake_f32 +#define xlog10 nsimd_sleef_log10_u10_avx512_skylake_f64 +#define xlog10f nsimd_sleef_log10_u10_avx512_skylake_f32 +#define xlog2 nsimd_sleef_log2_u10_avx512_skylake_f64 +#define xlog2f nsimd_sleef_log2_u10_avx512_skylake_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_avx512_skylake_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_avx512_skylake_f32 +#define xlog1p nsimd_sleef_log1p_u10_avx512_skylake_f64 +#define xlog1pf nsimd_sleef_log1p_u10_avx512_skylake_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_avx512_skylake_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_avx512_skylake_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_avx512_skylake_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_avx512_skylake_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_avx512_skylake_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_avx512_skylake_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_avx512_skylake_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_avx512_skylake_f32 +#define xldexp nsimd_sleef_ldexp_avx512_skylake_f64 +#define xldexpf nsimd_sleef_ldexp_avx512_skylake_f32 +#define xilogb nsimd_sleef_ilogb_avx512_skylake_f64 +#define xilogbf nsimd_sleef_ilogb_avx512_skylake_f32 +#define xfma nsimd_sleef_fma_avx512_skylake_f64 +#define xfmaf nsimd_sleef_fma_avx512_skylake_f32 +#define xsqrt nsimd_sleef_sqrt_avx512_skylake_f64 +#define xsqrtf nsimd_sleef_sqrt_avx512_skylake_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_avx512_skylake_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_avx512_skylake_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_avx512_skylake_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_avx512_skylake_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_avx512_skylake_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_avx512_skylake_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_avx512_skylake_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_avx512_skylake_f32 +#define xfabs nsimd_sleef_fabs_avx512_skylake_f64 +#define xfabsf nsimd_sleef_fabs_avx512_skylake_f32 +#define xcopysign nsimd_sleef_copysign_avx512_skylake_f64 +#define xcopysignf nsimd_sleef_copysign_avx512_skylake_f32 +#define xfmax nsimd_sleef_fmax_avx512_skylake_f64 +#define xfmaxf nsimd_sleef_fmax_avx512_skylake_f32 +#define xfmin nsimd_sleef_fmin_avx512_skylake_f64 +#define xfminf nsimd_sleef_fmin_avx512_skylake_f32 +#define xfdim nsimd_sleef_fdim_avx512_skylake_f64 +#define xfdimf nsimd_sleef_fdim_avx512_skylake_f32 +#define xtrunc nsimd_sleef_trunc_avx512_skylake_f64 +#define xtruncf nsimd_sleef_trunc_avx512_skylake_f32 +#define xfloor nsimd_sleef_floor_avx512_skylake_f64 +#define xfloorf nsimd_sleef_floor_avx512_skylake_f32 +#define xceil nsimd_sleef_ceil_avx512_skylake_f64 +#define xceilf nsimd_sleef_ceil_avx512_skylake_f32 +#define xround nsimd_sleef_round_avx512_skylake_f64 +#define xroundf nsimd_sleef_round_avx512_skylake_f32 +#define xrint nsimd_sleef_rint_avx512_skylake_f64 +#define xrintf nsimd_sleef_rint_avx512_skylake_f32 +#define xnextafter nsimd_sleef_nextafter_avx512_skylake_f64 +#define xnextafterf nsimd_sleef_nextafter_avx512_skylake_f32 +#define xfrfrexp nsimd_sleef_frfrexp_avx512_skylake_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_avx512_skylake_f32 +#define xexpfrexp nsimd_sleef_expfrexp_avx512_skylake_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_avx512_skylake_f32 +#define xfmod nsimd_sleef_fmod_avx512_skylake_f64 +#define xfmodf nsimd_sleef_fmod_avx512_skylake_f32 +#define xremainder nsimd_sleef_remainder_avx512_skylake_f64 +#define xremainderf nsimd_sleef_remainder_avx512_skylake_f32 +#define xmodf nsimd_sleef_modf_avx512_skylake_f64 +#define xmodff nsimd_sleef_modf_avx512_skylake_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_avx512_skylake_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_avx512_skylake_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_avx512_skylake_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_avx512_skylake_f32 +#define xerf_u1 nsimd_sleef_erf_u10_avx512_skylake_f64 +#define xerff_u1 nsimd_sleef_erf_u10_avx512_skylake_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_avx512_skylake_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_avx512_skylake_f32 +#define xgetInt nsimd_sleef_getInt_avx512_skylake_f64 +#define xgetIntf nsimd_sleef_getInt_avx512_skylake_f32 +#define xgetPtr nsimd_sleef_getPtr_avx512_skylake_f64 +#define xgetPtrf nsimd_sleef_getPtr_avx512_skylake_f32 + + #endif + + #define rempi nsimd_sleef_rempi_avx512_skylake + #define rempif nsimd_sleef_rempif_avx512_skylake + #define rempisub nsimd_sleef_rempisub_avx512_skylake + #define rempisubf nsimd_sleef_rempisubf_avx512_skylake + #define gammak nsimd_gammak_avx512_skylake + #define gammafk nsimd_gammafk_avx512_skylake + + #endif + + + +#endif + diff --git a/src/renameneon32.h b/src/renameneon32.h new file mode 100644 index 00000000..23b93a51 --- /dev/null +++ b/src/renameneon32.h @@ -0,0 +1,337 @@ +#ifndef RENAMENEON32_H + #define RENAMENEON32_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions neon128 */ + + #ifdef NSIMD_NEON128 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_neon128_f64 +#define xsinf nsimd_sleef_sin_u35d_neon128_f32 +#define xcos nsimd_sleef_cos_u35d_neon128_f64 +#define xcosf nsimd_sleef_cos_u35d_neon128_f32 +#define xsincos nsimd_sleef_sincos_u35d_neon128_f64 +#define xsincosf nsimd_sleef_sincos_u35d_neon128_f32 +#define xtan nsimd_sleef_tan_u35d_neon128_f64 +#define xtanf nsimd_sleef_tan_u35d_neon128_f32 +#define xasin nsimd_sleef_asin_u35d_neon128_f64 +#define xasinf nsimd_sleef_asin_u35d_neon128_f32 +#define xacos nsimd_sleef_acos_u35d_neon128_f64 +#define xacosf nsimd_sleef_acos_u35d_neon128_f32 +#define xatan nsimd_sleef_atan_u35d_neon128_f64 +#define xatanf nsimd_sleef_atan_u35d_neon128_f32 +#define xatan2 nsimd_sleef_atan2_u35d_neon128_f64 +#define xatan2f nsimd_sleef_atan2_u35d_neon128_f32 +#define xlog nsimd_sleef_log_u35d_neon128_f64 +#define xlogf nsimd_sleef_log_u35d_neon128_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_neon128_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_neon128_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_neon128_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_neon128_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_neon128_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_neon128_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_neon128_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_neon128_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_neon128_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_neon128_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_neon128_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_neon128_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_neon128_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_neon128_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_neon128_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_neon128_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_neon128_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_neon128_f32 +#define xlog_u1 nsimd_sleef_log_u10d_neon128_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_neon128_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_neon128_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_neon128_f32 +#define xexp nsimd_sleef_exp_u10d_neon128_f64 +#define xexpf nsimd_sleef_exp_u10d_neon128_f32 +#define xpow nsimd_sleef_pow_u10d_neon128_f64 +#define xpowf nsimd_sleef_pow_u10d_neon128_f32 +#define xsinh nsimd_sleef_sinh_u10d_neon128_f64 +#define xsinhf nsimd_sleef_sinh_u10d_neon128_f32 +#define xcosh nsimd_sleef_cosh_u10d_neon128_f64 +#define xcoshf nsimd_sleef_cosh_u10d_neon128_f32 +#define xtanh nsimd_sleef_tanh_u10d_neon128_f64 +#define xtanhf nsimd_sleef_tanh_u10d_neon128_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_neon128_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_neon128_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_neon128_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_neon128_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_neon128_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_neon128_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_neon128_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_neon128_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_neon128_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_neon128_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_neon128_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_neon128_f32 +#define xasinh nsimd_sleef_asinh_u10d_neon128_f64 +#define xasinhf nsimd_sleef_asinh_u10d_neon128_f32 +#define xacosh nsimd_sleef_acosh_u10d_neon128_f64 +#define xacoshf nsimd_sleef_acosh_u10d_neon128_f32 +#define xatanh nsimd_sleef_atanh_u10d_neon128_f64 +#define xatanhf nsimd_sleef_atanh_u10d_neon128_f32 +#define xexp2 nsimd_sleef_exp2_u10d_neon128_f64 +#define xexp2f nsimd_sleef_exp2_u10d_neon128_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_neon128_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_neon128_f32 +#define xexp10 nsimd_sleef_exp10_u10d_neon128_f64 +#define xexp10f nsimd_sleef_exp10_u10d_neon128_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_neon128_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_neon128_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_neon128_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_neon128_f32 +#define xlog10 nsimd_sleef_log10_u10d_neon128_f64 +#define xlog10f nsimd_sleef_log10_u10d_neon128_f32 +#define xlog2 nsimd_sleef_log2_u10d_neon128_f64 +#define xlog2f nsimd_sleef_log2_u10d_neon128_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_neon128_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_neon128_f32 +#define xlog1p nsimd_sleef_log1p_u10d_neon128_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_neon128_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_neon128_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_neon128_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_neon128_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_neon128_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_neon128_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_neon128_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_neon128_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_neon128_f32 +#define xldexp nsimd_sleef_ldexp_neon128_f64 +#define xldexpf nsimd_sleef_ldexp_neon128_f32 +#define xilogb nsimd_sleef_ilogb_neon128_f64 +#define xilogbf nsimd_sleef_ilogb_neon128_f32 +#define xfma nsimd_sleef_fma_neon128_f64 +#define xfmaf nsimd_sleef_fma_neon128_f32 +#define xsqrt nsimd_sleef_sqrt_neon128_f64 +#define xsqrtf nsimd_sleef_sqrt_neon128_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_neon128_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_neon128_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_neon128_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_neon128_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_neon128_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_neon128_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_neon128_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_neon128_f32 +#define xfabs nsimd_sleef_fabs_neon128_f64 +#define xfabsf nsimd_sleef_fabs_neon128_f32 +#define xcopysign nsimd_sleef_copysign_neon128_f64 +#define xcopysignf nsimd_sleef_copysign_neon128_f32 +#define xfmax nsimd_sleef_fmax_neon128_f64 +#define xfmaxf nsimd_sleef_fmax_neon128_f32 +#define xfmin nsimd_sleef_fmin_neon128_f64 +#define xfminf nsimd_sleef_fmin_neon128_f32 +#define xfdim nsimd_sleef_fdim_neon128_f64 +#define xfdimf nsimd_sleef_fdim_neon128_f32 +#define xtrunc nsimd_sleef_trunc_neon128_f64 +#define xtruncf nsimd_sleef_trunc_neon128_f32 +#define xfloor nsimd_sleef_floor_neon128_f64 +#define xfloorf nsimd_sleef_floor_neon128_f32 +#define xceil nsimd_sleef_ceil_neon128_f64 +#define xceilf nsimd_sleef_ceil_neon128_f32 +#define xround nsimd_sleef_round_neon128_f64 +#define xroundf nsimd_sleef_round_neon128_f32 +#define xrint nsimd_sleef_rint_neon128_f64 +#define xrintf nsimd_sleef_rint_neon128_f32 +#define xnextafter nsimd_sleef_nextafter_neon128_f64 +#define xnextafterf nsimd_sleef_nextafter_neon128_f32 +#define xfrfrexp nsimd_sleef_frfrexp_neon128_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_neon128_f32 +#define xexpfrexp nsimd_sleef_expfrexp_neon128_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_neon128_f32 +#define xfmod nsimd_sleef_fmod_neon128_f64 +#define xfmodf nsimd_sleef_fmod_neon128_f32 +#define xremainder nsimd_sleef_remainder_neon128_f64 +#define xremainderf nsimd_sleef_remainder_neon128_f32 +#define xmodf nsimd_sleef_modf_neon128_f64 +#define xmodff nsimd_sleef_modf_neon128_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_neon128_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_neon128_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_neon128_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_neon128_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_neon128_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_neon128_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_neon128_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_neon128_f32 +#define xgetInt nsimd_sleef_getInt_neon128_f64 +#define xgetIntf nsimd_sleef_getInt_neon128_f32 +#define xgetPtr nsimd_sleef_getPtr_neon128_f64 +#define xgetPtrf nsimd_sleef_getPtr_neon128_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_neon128_f64 +#define xsinf nsimd_sleef_sin_u35_neon128_f32 +#define xcos nsimd_sleef_cos_u35_neon128_f64 +#define xcosf nsimd_sleef_cos_u35_neon128_f32 +#define xsincos nsimd_sleef_sincos_u35_neon128_f64 +#define xsincosf nsimd_sleef_sincos_u35_neon128_f32 +#define xtan nsimd_sleef_tan_u35_neon128_f64 +#define xtanf nsimd_sleef_tan_u35_neon128_f32 +#define xasin nsimd_sleef_asin_u35_neon128_f64 +#define xasinf nsimd_sleef_asin_u35_neon128_f32 +#define xacos nsimd_sleef_acos_u35_neon128_f64 +#define xacosf nsimd_sleef_acos_u35_neon128_f32 +#define xatan nsimd_sleef_atan_u35_neon128_f64 +#define xatanf nsimd_sleef_atan_u35_neon128_f32 +#define xatan2 nsimd_sleef_atan2_u35_neon128_f64 +#define xatan2f nsimd_sleef_atan2_u35_neon128_f32 +#define xlog nsimd_sleef_log_u35_neon128_f64 +#define xlogf nsimd_sleef_log_u35_neon128_f32 +#define xcbrt nsimd_sleef_cbrt_u35_neon128_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_neon128_f32 +#define xsin_u1 nsimd_sleef_sin_u10_neon128_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_neon128_f32 +#define xcos_u1 nsimd_sleef_cos_u10_neon128_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_neon128_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_neon128_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_neon128_f32 +#define xtan_u1 nsimd_sleef_tan_u10_neon128_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_neon128_f32 +#define xasin_u1 nsimd_sleef_asin_u10_neon128_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_neon128_f32 +#define xacos_u1 nsimd_sleef_acos_u10_neon128_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_neon128_f32 +#define xatan_u1 nsimd_sleef_atan_u10_neon128_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_neon128_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_neon128_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_neon128_f32 +#define xlog_u1 nsimd_sleef_log_u10_neon128_f64 +#define xlogf_u1 nsimd_sleef_log_u10_neon128_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_neon128_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_neon128_f32 +#define xexp nsimd_sleef_exp_u10_neon128_f64 +#define xexpf nsimd_sleef_exp_u10_neon128_f32 +#define xpow nsimd_sleef_pow_u10_neon128_f64 +#define xpowf nsimd_sleef_pow_u10_neon128_f32 +#define xsinh nsimd_sleef_sinh_u10_neon128_f64 +#define xsinhf nsimd_sleef_sinh_u10_neon128_f32 +#define xcosh nsimd_sleef_cosh_u10_neon128_f64 +#define xcoshf nsimd_sleef_cosh_u10_neon128_f32 +#define xtanh nsimd_sleef_tanh_u10_neon128_f64 +#define xtanhf nsimd_sleef_tanh_u10_neon128_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_neon128_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_neon128_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_neon128_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_neon128_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_neon128_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_neon128_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_neon128_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_neon128_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_neon128_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_neon128_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_neon128_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_neon128_f32 +#define xasinh nsimd_sleef_asinh_u10_neon128_f64 +#define xasinhf nsimd_sleef_asinh_u10_neon128_f32 +#define xacosh nsimd_sleef_acosh_u10_neon128_f64 +#define xacoshf nsimd_sleef_acosh_u10_neon128_f32 +#define xatanh nsimd_sleef_atanh_u10_neon128_f64 +#define xatanhf nsimd_sleef_atanh_u10_neon128_f32 +#define xexp2 nsimd_sleef_exp2_u10_neon128_f64 +#define xexp2f nsimd_sleef_exp2_u10_neon128_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_neon128_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_neon128_f32 +#define xexp10 nsimd_sleef_exp10_u10_neon128_f64 +#define xexp10f nsimd_sleef_exp10_u10_neon128_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_neon128_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_neon128_f32 +#define xexpm1 nsimd_sleef_expm1_u10_neon128_f64 +#define xexpm1f nsimd_sleef_expm1_u10_neon128_f32 +#define xlog10 nsimd_sleef_log10_u10_neon128_f64 +#define xlog10f nsimd_sleef_log10_u10_neon128_f32 +#define xlog2 nsimd_sleef_log2_u10_neon128_f64 +#define xlog2f nsimd_sleef_log2_u10_neon128_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_neon128_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_neon128_f32 +#define xlog1p nsimd_sleef_log1p_u10_neon128_f64 +#define xlog1pf nsimd_sleef_log1p_u10_neon128_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_neon128_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_neon128_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_neon128_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_neon128_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_neon128_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_neon128_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_neon128_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_neon128_f32 +#define xldexp nsimd_sleef_ldexp_neon128_f64 +#define xldexpf nsimd_sleef_ldexp_neon128_f32 +#define xilogb nsimd_sleef_ilogb_neon128_f64 +#define xilogbf nsimd_sleef_ilogb_neon128_f32 +#define xfma nsimd_sleef_fma_neon128_f64 +#define xfmaf nsimd_sleef_fma_neon128_f32 +#define xsqrt nsimd_sleef_sqrt_neon128_f64 +#define xsqrtf nsimd_sleef_sqrt_neon128_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_neon128_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_neon128_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_neon128_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_neon128_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_neon128_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_neon128_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_neon128_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_neon128_f32 +#define xfabs nsimd_sleef_fabs_neon128_f64 +#define xfabsf nsimd_sleef_fabs_neon128_f32 +#define xcopysign nsimd_sleef_copysign_neon128_f64 +#define xcopysignf nsimd_sleef_copysign_neon128_f32 +#define xfmax nsimd_sleef_fmax_neon128_f64 +#define xfmaxf nsimd_sleef_fmax_neon128_f32 +#define xfmin nsimd_sleef_fmin_neon128_f64 +#define xfminf nsimd_sleef_fmin_neon128_f32 +#define xfdim nsimd_sleef_fdim_neon128_f64 +#define xfdimf nsimd_sleef_fdim_neon128_f32 +#define xtrunc nsimd_sleef_trunc_neon128_f64 +#define xtruncf nsimd_sleef_trunc_neon128_f32 +#define xfloor nsimd_sleef_floor_neon128_f64 +#define xfloorf nsimd_sleef_floor_neon128_f32 +#define xceil nsimd_sleef_ceil_neon128_f64 +#define xceilf nsimd_sleef_ceil_neon128_f32 +#define xround nsimd_sleef_round_neon128_f64 +#define xroundf nsimd_sleef_round_neon128_f32 +#define xrint nsimd_sleef_rint_neon128_f64 +#define xrintf nsimd_sleef_rint_neon128_f32 +#define xnextafter nsimd_sleef_nextafter_neon128_f64 +#define xnextafterf nsimd_sleef_nextafter_neon128_f32 +#define xfrfrexp nsimd_sleef_frfrexp_neon128_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_neon128_f32 +#define xexpfrexp nsimd_sleef_expfrexp_neon128_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_neon128_f32 +#define xfmod nsimd_sleef_fmod_neon128_f64 +#define xfmodf nsimd_sleef_fmod_neon128_f32 +#define xremainder nsimd_sleef_remainder_neon128_f64 +#define xremainderf nsimd_sleef_remainder_neon128_f32 +#define xmodf nsimd_sleef_modf_neon128_f64 +#define xmodff nsimd_sleef_modf_neon128_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_neon128_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_neon128_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_neon128_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_neon128_f32 +#define xerf_u1 nsimd_sleef_erf_u10_neon128_f64 +#define xerff_u1 nsimd_sleef_erf_u10_neon128_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_neon128_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_neon128_f32 +#define xgetInt nsimd_sleef_getInt_neon128_f64 +#define xgetIntf nsimd_sleef_getInt_neon128_f32 +#define xgetPtr nsimd_sleef_getPtr_neon128_f64 +#define xgetPtrf nsimd_sleef_getPtr_neon128_f32 + + #endif + + #define rempi nsimd_sleef_rempi_neon128 + #define rempif nsimd_sleef_rempif_neon128 + #define rempisub nsimd_sleef_rempisub_neon128 + #define rempisubf nsimd_sleef_rempisubf_neon128 + #define gammak nsimd_gammak_neon128 + #define gammafk nsimd_gammafk_neon128 + + #endif + + + +#endif + diff --git a/src/renamesse2.h b/src/renamesse2.h new file mode 100644 index 00000000..76a95d19 --- /dev/null +++ b/src/renamesse2.h @@ -0,0 +1,337 @@ +#ifndef RENAMESSE2_H + #define RENAMESSE2_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions sse2 */ + + #ifdef NSIMD_SSE2 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_sse2_f64 +#define xsinf nsimd_sleef_sin_u35d_sse2_f32 +#define xcos nsimd_sleef_cos_u35d_sse2_f64 +#define xcosf nsimd_sleef_cos_u35d_sse2_f32 +#define xsincos nsimd_sleef_sincos_u35d_sse2_f64 +#define xsincosf nsimd_sleef_sincos_u35d_sse2_f32 +#define xtan nsimd_sleef_tan_u35d_sse2_f64 +#define xtanf nsimd_sleef_tan_u35d_sse2_f32 +#define xasin nsimd_sleef_asin_u35d_sse2_f64 +#define xasinf nsimd_sleef_asin_u35d_sse2_f32 +#define xacos nsimd_sleef_acos_u35d_sse2_f64 +#define xacosf nsimd_sleef_acos_u35d_sse2_f32 +#define xatan nsimd_sleef_atan_u35d_sse2_f64 +#define xatanf nsimd_sleef_atan_u35d_sse2_f32 +#define xatan2 nsimd_sleef_atan2_u35d_sse2_f64 +#define xatan2f nsimd_sleef_atan2_u35d_sse2_f32 +#define xlog nsimd_sleef_log_u35d_sse2_f64 +#define xlogf nsimd_sleef_log_u35d_sse2_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_sse2_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_sse2_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_sse2_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_sse2_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_sse2_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_sse2_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_sse2_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_sse2_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_sse2_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_sse2_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_sse2_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_sse2_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_sse2_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_sse2_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_sse2_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_sse2_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_sse2_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_sse2_f32 +#define xlog_u1 nsimd_sleef_log_u10d_sse2_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_sse2_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_sse2_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_sse2_f32 +#define xexp nsimd_sleef_exp_u10d_sse2_f64 +#define xexpf nsimd_sleef_exp_u10d_sse2_f32 +#define xpow nsimd_sleef_pow_u10d_sse2_f64 +#define xpowf nsimd_sleef_pow_u10d_sse2_f32 +#define xsinh nsimd_sleef_sinh_u10d_sse2_f64 +#define xsinhf nsimd_sleef_sinh_u10d_sse2_f32 +#define xcosh nsimd_sleef_cosh_u10d_sse2_f64 +#define xcoshf nsimd_sleef_cosh_u10d_sse2_f32 +#define xtanh nsimd_sleef_tanh_u10d_sse2_f64 +#define xtanhf nsimd_sleef_tanh_u10d_sse2_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_sse2_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_sse2_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_sse2_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_sse2_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_sse2_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_sse2_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_sse2_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_sse2_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_sse2_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_sse2_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_sse2_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_sse2_f32 +#define xasinh nsimd_sleef_asinh_u10d_sse2_f64 +#define xasinhf nsimd_sleef_asinh_u10d_sse2_f32 +#define xacosh nsimd_sleef_acosh_u10d_sse2_f64 +#define xacoshf nsimd_sleef_acosh_u10d_sse2_f32 +#define xatanh nsimd_sleef_atanh_u10d_sse2_f64 +#define xatanhf nsimd_sleef_atanh_u10d_sse2_f32 +#define xexp2 nsimd_sleef_exp2_u10d_sse2_f64 +#define xexp2f nsimd_sleef_exp2_u10d_sse2_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_sse2_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_sse2_f32 +#define xexp10 nsimd_sleef_exp10_u10d_sse2_f64 +#define xexp10f nsimd_sleef_exp10_u10d_sse2_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_sse2_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_sse2_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_sse2_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_sse2_f32 +#define xlog10 nsimd_sleef_log10_u10d_sse2_f64 +#define xlog10f nsimd_sleef_log10_u10d_sse2_f32 +#define xlog2 nsimd_sleef_log2_u10d_sse2_f64 +#define xlog2f nsimd_sleef_log2_u10d_sse2_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_sse2_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_sse2_f32 +#define xlog1p nsimd_sleef_log1p_u10d_sse2_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_sse2_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_sse2_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_sse2_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_sse2_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_sse2_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_sse2_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_sse2_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_sse2_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_sse2_f32 +#define xldexp nsimd_sleef_ldexp_sse2_f64 +#define xldexpf nsimd_sleef_ldexp_sse2_f32 +#define xilogb nsimd_sleef_ilogb_sse2_f64 +#define xilogbf nsimd_sleef_ilogb_sse2_f32 +#define xfma nsimd_sleef_fma_sse2_f64 +#define xfmaf nsimd_sleef_fma_sse2_f32 +#define xsqrt nsimd_sleef_sqrt_sse2_f64 +#define xsqrtf nsimd_sleef_sqrt_sse2_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_sse2_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_sse2_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_sse2_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_sse2_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_sse2_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_sse2_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_sse2_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_sse2_f32 +#define xfabs nsimd_sleef_fabs_sse2_f64 +#define xfabsf nsimd_sleef_fabs_sse2_f32 +#define xcopysign nsimd_sleef_copysign_sse2_f64 +#define xcopysignf nsimd_sleef_copysign_sse2_f32 +#define xfmax nsimd_sleef_fmax_sse2_f64 +#define xfmaxf nsimd_sleef_fmax_sse2_f32 +#define xfmin nsimd_sleef_fmin_sse2_f64 +#define xfminf nsimd_sleef_fmin_sse2_f32 +#define xfdim nsimd_sleef_fdim_sse2_f64 +#define xfdimf nsimd_sleef_fdim_sse2_f32 +#define xtrunc nsimd_sleef_trunc_sse2_f64 +#define xtruncf nsimd_sleef_trunc_sse2_f32 +#define xfloor nsimd_sleef_floor_sse2_f64 +#define xfloorf nsimd_sleef_floor_sse2_f32 +#define xceil nsimd_sleef_ceil_sse2_f64 +#define xceilf nsimd_sleef_ceil_sse2_f32 +#define xround nsimd_sleef_round_sse2_f64 +#define xroundf nsimd_sleef_round_sse2_f32 +#define xrint nsimd_sleef_rint_sse2_f64 +#define xrintf nsimd_sleef_rint_sse2_f32 +#define xnextafter nsimd_sleef_nextafter_sse2_f64 +#define xnextafterf nsimd_sleef_nextafter_sse2_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sse2_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sse2_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sse2_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sse2_f32 +#define xfmod nsimd_sleef_fmod_sse2_f64 +#define xfmodf nsimd_sleef_fmod_sse2_f32 +#define xremainder nsimd_sleef_remainder_sse2_f64 +#define xremainderf nsimd_sleef_remainder_sse2_f32 +#define xmodf nsimd_sleef_modf_sse2_f64 +#define xmodff nsimd_sleef_modf_sse2_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_sse2_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_sse2_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_sse2_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_sse2_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_sse2_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_sse2_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_sse2_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_sse2_f32 +#define xgetInt nsimd_sleef_getInt_sse2_f64 +#define xgetIntf nsimd_sleef_getInt_sse2_f32 +#define xgetPtr nsimd_sleef_getPtr_sse2_f64 +#define xgetPtrf nsimd_sleef_getPtr_sse2_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_sse2_f64 +#define xsinf nsimd_sleef_sin_u35_sse2_f32 +#define xcos nsimd_sleef_cos_u35_sse2_f64 +#define xcosf nsimd_sleef_cos_u35_sse2_f32 +#define xsincos nsimd_sleef_sincos_u35_sse2_f64 +#define xsincosf nsimd_sleef_sincos_u35_sse2_f32 +#define xtan nsimd_sleef_tan_u35_sse2_f64 +#define xtanf nsimd_sleef_tan_u35_sse2_f32 +#define xasin nsimd_sleef_asin_u35_sse2_f64 +#define xasinf nsimd_sleef_asin_u35_sse2_f32 +#define xacos nsimd_sleef_acos_u35_sse2_f64 +#define xacosf nsimd_sleef_acos_u35_sse2_f32 +#define xatan nsimd_sleef_atan_u35_sse2_f64 +#define xatanf nsimd_sleef_atan_u35_sse2_f32 +#define xatan2 nsimd_sleef_atan2_u35_sse2_f64 +#define xatan2f nsimd_sleef_atan2_u35_sse2_f32 +#define xlog nsimd_sleef_log_u35_sse2_f64 +#define xlogf nsimd_sleef_log_u35_sse2_f32 +#define xcbrt nsimd_sleef_cbrt_u35_sse2_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_sse2_f32 +#define xsin_u1 nsimd_sleef_sin_u10_sse2_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_sse2_f32 +#define xcos_u1 nsimd_sleef_cos_u10_sse2_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_sse2_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_sse2_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_sse2_f32 +#define xtan_u1 nsimd_sleef_tan_u10_sse2_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_sse2_f32 +#define xasin_u1 nsimd_sleef_asin_u10_sse2_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_sse2_f32 +#define xacos_u1 nsimd_sleef_acos_u10_sse2_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_sse2_f32 +#define xatan_u1 nsimd_sleef_atan_u10_sse2_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_sse2_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_sse2_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_sse2_f32 +#define xlog_u1 nsimd_sleef_log_u10_sse2_f64 +#define xlogf_u1 nsimd_sleef_log_u10_sse2_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_sse2_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_sse2_f32 +#define xexp nsimd_sleef_exp_u10_sse2_f64 +#define xexpf nsimd_sleef_exp_u10_sse2_f32 +#define xpow nsimd_sleef_pow_u10_sse2_f64 +#define xpowf nsimd_sleef_pow_u10_sse2_f32 +#define xsinh nsimd_sleef_sinh_u10_sse2_f64 +#define xsinhf nsimd_sleef_sinh_u10_sse2_f32 +#define xcosh nsimd_sleef_cosh_u10_sse2_f64 +#define xcoshf nsimd_sleef_cosh_u10_sse2_f32 +#define xtanh nsimd_sleef_tanh_u10_sse2_f64 +#define xtanhf nsimd_sleef_tanh_u10_sse2_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_sse2_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_sse2_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_sse2_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_sse2_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_sse2_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_sse2_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_sse2_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_sse2_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_sse2_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_sse2_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_sse2_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_sse2_f32 +#define xasinh nsimd_sleef_asinh_u10_sse2_f64 +#define xasinhf nsimd_sleef_asinh_u10_sse2_f32 +#define xacosh nsimd_sleef_acosh_u10_sse2_f64 +#define xacoshf nsimd_sleef_acosh_u10_sse2_f32 +#define xatanh nsimd_sleef_atanh_u10_sse2_f64 +#define xatanhf nsimd_sleef_atanh_u10_sse2_f32 +#define xexp2 nsimd_sleef_exp2_u10_sse2_f64 +#define xexp2f nsimd_sleef_exp2_u10_sse2_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_sse2_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_sse2_f32 +#define xexp10 nsimd_sleef_exp10_u10_sse2_f64 +#define xexp10f nsimd_sleef_exp10_u10_sse2_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_sse2_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_sse2_f32 +#define xexpm1 nsimd_sleef_expm1_u10_sse2_f64 +#define xexpm1f nsimd_sleef_expm1_u10_sse2_f32 +#define xlog10 nsimd_sleef_log10_u10_sse2_f64 +#define xlog10f nsimd_sleef_log10_u10_sse2_f32 +#define xlog2 nsimd_sleef_log2_u10_sse2_f64 +#define xlog2f nsimd_sleef_log2_u10_sse2_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_sse2_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_sse2_f32 +#define xlog1p nsimd_sleef_log1p_u10_sse2_f64 +#define xlog1pf nsimd_sleef_log1p_u10_sse2_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_sse2_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_sse2_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_sse2_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_sse2_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_sse2_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_sse2_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_sse2_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_sse2_f32 +#define xldexp nsimd_sleef_ldexp_sse2_f64 +#define xldexpf nsimd_sleef_ldexp_sse2_f32 +#define xilogb nsimd_sleef_ilogb_sse2_f64 +#define xilogbf nsimd_sleef_ilogb_sse2_f32 +#define xfma nsimd_sleef_fma_sse2_f64 +#define xfmaf nsimd_sleef_fma_sse2_f32 +#define xsqrt nsimd_sleef_sqrt_sse2_f64 +#define xsqrtf nsimd_sleef_sqrt_sse2_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_sse2_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_sse2_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_sse2_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_sse2_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_sse2_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_sse2_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_sse2_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_sse2_f32 +#define xfabs nsimd_sleef_fabs_sse2_f64 +#define xfabsf nsimd_sleef_fabs_sse2_f32 +#define xcopysign nsimd_sleef_copysign_sse2_f64 +#define xcopysignf nsimd_sleef_copysign_sse2_f32 +#define xfmax nsimd_sleef_fmax_sse2_f64 +#define xfmaxf nsimd_sleef_fmax_sse2_f32 +#define xfmin nsimd_sleef_fmin_sse2_f64 +#define xfminf nsimd_sleef_fmin_sse2_f32 +#define xfdim nsimd_sleef_fdim_sse2_f64 +#define xfdimf nsimd_sleef_fdim_sse2_f32 +#define xtrunc nsimd_sleef_trunc_sse2_f64 +#define xtruncf nsimd_sleef_trunc_sse2_f32 +#define xfloor nsimd_sleef_floor_sse2_f64 +#define xfloorf nsimd_sleef_floor_sse2_f32 +#define xceil nsimd_sleef_ceil_sse2_f64 +#define xceilf nsimd_sleef_ceil_sse2_f32 +#define xround nsimd_sleef_round_sse2_f64 +#define xroundf nsimd_sleef_round_sse2_f32 +#define xrint nsimd_sleef_rint_sse2_f64 +#define xrintf nsimd_sleef_rint_sse2_f32 +#define xnextafter nsimd_sleef_nextafter_sse2_f64 +#define xnextafterf nsimd_sleef_nextafter_sse2_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sse2_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sse2_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sse2_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sse2_f32 +#define xfmod nsimd_sleef_fmod_sse2_f64 +#define xfmodf nsimd_sleef_fmod_sse2_f32 +#define xremainder nsimd_sleef_remainder_sse2_f64 +#define xremainderf nsimd_sleef_remainder_sse2_f32 +#define xmodf nsimd_sleef_modf_sse2_f64 +#define xmodff nsimd_sleef_modf_sse2_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_sse2_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_sse2_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_sse2_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_sse2_f32 +#define xerf_u1 nsimd_sleef_erf_u10_sse2_f64 +#define xerff_u1 nsimd_sleef_erf_u10_sse2_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_sse2_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_sse2_f32 +#define xgetInt nsimd_sleef_getInt_sse2_f64 +#define xgetIntf nsimd_sleef_getInt_sse2_f32 +#define xgetPtr nsimd_sleef_getPtr_sse2_f64 +#define xgetPtrf nsimd_sleef_getPtr_sse2_f32 + + #endif + + #define rempi nsimd_sleef_rempi_sse2 + #define rempif nsimd_sleef_rempif_sse2 + #define rempisub nsimd_sleef_rempisub_sse2 + #define rempisubf nsimd_sleef_rempisubf_sse2 + #define gammak nsimd_gammak_sse2 + #define gammafk nsimd_gammafk_sse2 + + #endif + + + +#endif + diff --git a/src/renamesse4.h b/src/renamesse4.h new file mode 100644 index 00000000..a3b8b08b --- /dev/null +++ b/src/renamesse4.h @@ -0,0 +1,337 @@ +#ifndef RENAMESSE4_H + #define RENAMESSE4_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions sse42 */ + + #ifdef NSIMD_SSE42 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_sse42_f64 +#define xsinf nsimd_sleef_sin_u35d_sse42_f32 +#define xcos nsimd_sleef_cos_u35d_sse42_f64 +#define xcosf nsimd_sleef_cos_u35d_sse42_f32 +#define xsincos nsimd_sleef_sincos_u35d_sse42_f64 +#define xsincosf nsimd_sleef_sincos_u35d_sse42_f32 +#define xtan nsimd_sleef_tan_u35d_sse42_f64 +#define xtanf nsimd_sleef_tan_u35d_sse42_f32 +#define xasin nsimd_sleef_asin_u35d_sse42_f64 +#define xasinf nsimd_sleef_asin_u35d_sse42_f32 +#define xacos nsimd_sleef_acos_u35d_sse42_f64 +#define xacosf nsimd_sleef_acos_u35d_sse42_f32 +#define xatan nsimd_sleef_atan_u35d_sse42_f64 +#define xatanf nsimd_sleef_atan_u35d_sse42_f32 +#define xatan2 nsimd_sleef_atan2_u35d_sse42_f64 +#define xatan2f nsimd_sleef_atan2_u35d_sse42_f32 +#define xlog nsimd_sleef_log_u35d_sse42_f64 +#define xlogf nsimd_sleef_log_u35d_sse42_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_sse42_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_sse42_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_sse42_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_sse42_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_sse42_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_sse42_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_sse42_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_sse42_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_sse42_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_sse42_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_sse42_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_sse42_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_sse42_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_sse42_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_sse42_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_sse42_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_sse42_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_sse42_f32 +#define xlog_u1 nsimd_sleef_log_u10d_sse42_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_sse42_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_sse42_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_sse42_f32 +#define xexp nsimd_sleef_exp_u10d_sse42_f64 +#define xexpf nsimd_sleef_exp_u10d_sse42_f32 +#define xpow nsimd_sleef_pow_u10d_sse42_f64 +#define xpowf nsimd_sleef_pow_u10d_sse42_f32 +#define xsinh nsimd_sleef_sinh_u10d_sse42_f64 +#define xsinhf nsimd_sleef_sinh_u10d_sse42_f32 +#define xcosh nsimd_sleef_cosh_u10d_sse42_f64 +#define xcoshf nsimd_sleef_cosh_u10d_sse42_f32 +#define xtanh nsimd_sleef_tanh_u10d_sse42_f64 +#define xtanhf nsimd_sleef_tanh_u10d_sse42_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_sse42_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_sse42_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_sse42_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_sse42_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_sse42_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_sse42_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_sse42_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_sse42_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_sse42_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_sse42_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_sse42_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_sse42_f32 +#define xasinh nsimd_sleef_asinh_u10d_sse42_f64 +#define xasinhf nsimd_sleef_asinh_u10d_sse42_f32 +#define xacosh nsimd_sleef_acosh_u10d_sse42_f64 +#define xacoshf nsimd_sleef_acosh_u10d_sse42_f32 +#define xatanh nsimd_sleef_atanh_u10d_sse42_f64 +#define xatanhf nsimd_sleef_atanh_u10d_sse42_f32 +#define xexp2 nsimd_sleef_exp2_u10d_sse42_f64 +#define xexp2f nsimd_sleef_exp2_u10d_sse42_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_sse42_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_sse42_f32 +#define xexp10 nsimd_sleef_exp10_u10d_sse42_f64 +#define xexp10f nsimd_sleef_exp10_u10d_sse42_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_sse42_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_sse42_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_sse42_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_sse42_f32 +#define xlog10 nsimd_sleef_log10_u10d_sse42_f64 +#define xlog10f nsimd_sleef_log10_u10d_sse42_f32 +#define xlog2 nsimd_sleef_log2_u10d_sse42_f64 +#define xlog2f nsimd_sleef_log2_u10d_sse42_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_sse42_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_sse42_f32 +#define xlog1p nsimd_sleef_log1p_u10d_sse42_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_sse42_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_sse42_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_sse42_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_sse42_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_sse42_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_sse42_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_sse42_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_sse42_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_sse42_f32 +#define xldexp nsimd_sleef_ldexp_sse42_f64 +#define xldexpf nsimd_sleef_ldexp_sse42_f32 +#define xilogb nsimd_sleef_ilogb_sse42_f64 +#define xilogbf nsimd_sleef_ilogb_sse42_f32 +#define xfma nsimd_sleef_fma_sse42_f64 +#define xfmaf nsimd_sleef_fma_sse42_f32 +#define xsqrt nsimd_sleef_sqrt_sse42_f64 +#define xsqrtf nsimd_sleef_sqrt_sse42_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_sse42_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_sse42_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_sse42_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_sse42_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_sse42_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_sse42_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_sse42_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_sse42_f32 +#define xfabs nsimd_sleef_fabs_sse42_f64 +#define xfabsf nsimd_sleef_fabs_sse42_f32 +#define xcopysign nsimd_sleef_copysign_sse42_f64 +#define xcopysignf nsimd_sleef_copysign_sse42_f32 +#define xfmax nsimd_sleef_fmax_sse42_f64 +#define xfmaxf nsimd_sleef_fmax_sse42_f32 +#define xfmin nsimd_sleef_fmin_sse42_f64 +#define xfminf nsimd_sleef_fmin_sse42_f32 +#define xfdim nsimd_sleef_fdim_sse42_f64 +#define xfdimf nsimd_sleef_fdim_sse42_f32 +#define xtrunc nsimd_sleef_trunc_sse42_f64 +#define xtruncf nsimd_sleef_trunc_sse42_f32 +#define xfloor nsimd_sleef_floor_sse42_f64 +#define xfloorf nsimd_sleef_floor_sse42_f32 +#define xceil nsimd_sleef_ceil_sse42_f64 +#define xceilf nsimd_sleef_ceil_sse42_f32 +#define xround nsimd_sleef_round_sse42_f64 +#define xroundf nsimd_sleef_round_sse42_f32 +#define xrint nsimd_sleef_rint_sse42_f64 +#define xrintf nsimd_sleef_rint_sse42_f32 +#define xnextafter nsimd_sleef_nextafter_sse42_f64 +#define xnextafterf nsimd_sleef_nextafter_sse42_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sse42_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sse42_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sse42_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sse42_f32 +#define xfmod nsimd_sleef_fmod_sse42_f64 +#define xfmodf nsimd_sleef_fmod_sse42_f32 +#define xremainder nsimd_sleef_remainder_sse42_f64 +#define xremainderf nsimd_sleef_remainder_sse42_f32 +#define xmodf nsimd_sleef_modf_sse42_f64 +#define xmodff nsimd_sleef_modf_sse42_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_sse42_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_sse42_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_sse42_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_sse42_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_sse42_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_sse42_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_sse42_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_sse42_f32 +#define xgetInt nsimd_sleef_getInt_sse42_f64 +#define xgetIntf nsimd_sleef_getInt_sse42_f32 +#define xgetPtr nsimd_sleef_getPtr_sse42_f64 +#define xgetPtrf nsimd_sleef_getPtr_sse42_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_sse42_f64 +#define xsinf nsimd_sleef_sin_u35_sse42_f32 +#define xcos nsimd_sleef_cos_u35_sse42_f64 +#define xcosf nsimd_sleef_cos_u35_sse42_f32 +#define xsincos nsimd_sleef_sincos_u35_sse42_f64 +#define xsincosf nsimd_sleef_sincos_u35_sse42_f32 +#define xtan nsimd_sleef_tan_u35_sse42_f64 +#define xtanf nsimd_sleef_tan_u35_sse42_f32 +#define xasin nsimd_sleef_asin_u35_sse42_f64 +#define xasinf nsimd_sleef_asin_u35_sse42_f32 +#define xacos nsimd_sleef_acos_u35_sse42_f64 +#define xacosf nsimd_sleef_acos_u35_sse42_f32 +#define xatan nsimd_sleef_atan_u35_sse42_f64 +#define xatanf nsimd_sleef_atan_u35_sse42_f32 +#define xatan2 nsimd_sleef_atan2_u35_sse42_f64 +#define xatan2f nsimd_sleef_atan2_u35_sse42_f32 +#define xlog nsimd_sleef_log_u35_sse42_f64 +#define xlogf nsimd_sleef_log_u35_sse42_f32 +#define xcbrt nsimd_sleef_cbrt_u35_sse42_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_sse42_f32 +#define xsin_u1 nsimd_sleef_sin_u10_sse42_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_sse42_f32 +#define xcos_u1 nsimd_sleef_cos_u10_sse42_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_sse42_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_sse42_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_sse42_f32 +#define xtan_u1 nsimd_sleef_tan_u10_sse42_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_sse42_f32 +#define xasin_u1 nsimd_sleef_asin_u10_sse42_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_sse42_f32 +#define xacos_u1 nsimd_sleef_acos_u10_sse42_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_sse42_f32 +#define xatan_u1 nsimd_sleef_atan_u10_sse42_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_sse42_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_sse42_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_sse42_f32 +#define xlog_u1 nsimd_sleef_log_u10_sse42_f64 +#define xlogf_u1 nsimd_sleef_log_u10_sse42_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_sse42_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_sse42_f32 +#define xexp nsimd_sleef_exp_u10_sse42_f64 +#define xexpf nsimd_sleef_exp_u10_sse42_f32 +#define xpow nsimd_sleef_pow_u10_sse42_f64 +#define xpowf nsimd_sleef_pow_u10_sse42_f32 +#define xsinh nsimd_sleef_sinh_u10_sse42_f64 +#define xsinhf nsimd_sleef_sinh_u10_sse42_f32 +#define xcosh nsimd_sleef_cosh_u10_sse42_f64 +#define xcoshf nsimd_sleef_cosh_u10_sse42_f32 +#define xtanh nsimd_sleef_tanh_u10_sse42_f64 +#define xtanhf nsimd_sleef_tanh_u10_sse42_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_sse42_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_sse42_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_sse42_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_sse42_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_sse42_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_sse42_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_sse42_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_sse42_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_sse42_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_sse42_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_sse42_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_sse42_f32 +#define xasinh nsimd_sleef_asinh_u10_sse42_f64 +#define xasinhf nsimd_sleef_asinh_u10_sse42_f32 +#define xacosh nsimd_sleef_acosh_u10_sse42_f64 +#define xacoshf nsimd_sleef_acosh_u10_sse42_f32 +#define xatanh nsimd_sleef_atanh_u10_sse42_f64 +#define xatanhf nsimd_sleef_atanh_u10_sse42_f32 +#define xexp2 nsimd_sleef_exp2_u10_sse42_f64 +#define xexp2f nsimd_sleef_exp2_u10_sse42_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_sse42_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_sse42_f32 +#define xexp10 nsimd_sleef_exp10_u10_sse42_f64 +#define xexp10f nsimd_sleef_exp10_u10_sse42_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_sse42_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_sse42_f32 +#define xexpm1 nsimd_sleef_expm1_u10_sse42_f64 +#define xexpm1f nsimd_sleef_expm1_u10_sse42_f32 +#define xlog10 nsimd_sleef_log10_u10_sse42_f64 +#define xlog10f nsimd_sleef_log10_u10_sse42_f32 +#define xlog2 nsimd_sleef_log2_u10_sse42_f64 +#define xlog2f nsimd_sleef_log2_u10_sse42_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_sse42_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_sse42_f32 +#define xlog1p nsimd_sleef_log1p_u10_sse42_f64 +#define xlog1pf nsimd_sleef_log1p_u10_sse42_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_sse42_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_sse42_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_sse42_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_sse42_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_sse42_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_sse42_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_sse42_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_sse42_f32 +#define xldexp nsimd_sleef_ldexp_sse42_f64 +#define xldexpf nsimd_sleef_ldexp_sse42_f32 +#define xilogb nsimd_sleef_ilogb_sse42_f64 +#define xilogbf nsimd_sleef_ilogb_sse42_f32 +#define xfma nsimd_sleef_fma_sse42_f64 +#define xfmaf nsimd_sleef_fma_sse42_f32 +#define xsqrt nsimd_sleef_sqrt_sse42_f64 +#define xsqrtf nsimd_sleef_sqrt_sse42_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_sse42_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_sse42_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_sse42_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_sse42_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_sse42_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_sse42_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_sse42_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_sse42_f32 +#define xfabs nsimd_sleef_fabs_sse42_f64 +#define xfabsf nsimd_sleef_fabs_sse42_f32 +#define xcopysign nsimd_sleef_copysign_sse42_f64 +#define xcopysignf nsimd_sleef_copysign_sse42_f32 +#define xfmax nsimd_sleef_fmax_sse42_f64 +#define xfmaxf nsimd_sleef_fmax_sse42_f32 +#define xfmin nsimd_sleef_fmin_sse42_f64 +#define xfminf nsimd_sleef_fmin_sse42_f32 +#define xfdim nsimd_sleef_fdim_sse42_f64 +#define xfdimf nsimd_sleef_fdim_sse42_f32 +#define xtrunc nsimd_sleef_trunc_sse42_f64 +#define xtruncf nsimd_sleef_trunc_sse42_f32 +#define xfloor nsimd_sleef_floor_sse42_f64 +#define xfloorf nsimd_sleef_floor_sse42_f32 +#define xceil nsimd_sleef_ceil_sse42_f64 +#define xceilf nsimd_sleef_ceil_sse42_f32 +#define xround nsimd_sleef_round_sse42_f64 +#define xroundf nsimd_sleef_round_sse42_f32 +#define xrint nsimd_sleef_rint_sse42_f64 +#define xrintf nsimd_sleef_rint_sse42_f32 +#define xnextafter nsimd_sleef_nextafter_sse42_f64 +#define xnextafterf nsimd_sleef_nextafter_sse42_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sse42_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sse42_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sse42_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sse42_f32 +#define xfmod nsimd_sleef_fmod_sse42_f64 +#define xfmodf nsimd_sleef_fmod_sse42_f32 +#define xremainder nsimd_sleef_remainder_sse42_f64 +#define xremainderf nsimd_sleef_remainder_sse42_f32 +#define xmodf nsimd_sleef_modf_sse42_f64 +#define xmodff nsimd_sleef_modf_sse42_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_sse42_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_sse42_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_sse42_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_sse42_f32 +#define xerf_u1 nsimd_sleef_erf_u10_sse42_f64 +#define xerff_u1 nsimd_sleef_erf_u10_sse42_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_sse42_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_sse42_f32 +#define xgetInt nsimd_sleef_getInt_sse42_f64 +#define xgetIntf nsimd_sleef_getInt_sse42_f32 +#define xgetPtr nsimd_sleef_getPtr_sse42_f64 +#define xgetPtrf nsimd_sleef_getPtr_sse42_f32 + + #endif + + #define rempi nsimd_sleef_rempi_sse42 + #define rempif nsimd_sleef_rempif_sse42 + #define rempisub nsimd_sleef_rempisub_sse42 + #define rempisubf nsimd_sleef_rempisubf_sse42 + #define gammak nsimd_gammak_sse42 + #define gammafk nsimd_gammafk_sse42 + + #endif + + + +#endif + diff --git a/src/renamesve.h b/src/renamesve.h new file mode 100644 index 00000000..0090869f --- /dev/null +++ b/src/renamesve.h @@ -0,0 +1,1657 @@ +#ifndef RENAMESVE_H + #define RENAMESVE_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions sve128 */ + + #ifdef NSIMD_SVE128 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_sve128_f64 +#define xsinf nsimd_sleef_sin_u35d_sve128_f32 +#define xcos nsimd_sleef_cos_u35d_sve128_f64 +#define xcosf nsimd_sleef_cos_u35d_sve128_f32 +#define xsincos nsimd_sleef_sincos_u35d_sve128_f64 +#define xsincosf nsimd_sleef_sincos_u35d_sve128_f32 +#define xtan nsimd_sleef_tan_u35d_sve128_f64 +#define xtanf nsimd_sleef_tan_u35d_sve128_f32 +#define xasin nsimd_sleef_asin_u35d_sve128_f64 +#define xasinf nsimd_sleef_asin_u35d_sve128_f32 +#define xacos nsimd_sleef_acos_u35d_sve128_f64 +#define xacosf nsimd_sleef_acos_u35d_sve128_f32 +#define xatan nsimd_sleef_atan_u35d_sve128_f64 +#define xatanf nsimd_sleef_atan_u35d_sve128_f32 +#define xatan2 nsimd_sleef_atan2_u35d_sve128_f64 +#define xatan2f nsimd_sleef_atan2_u35d_sve128_f32 +#define xlog nsimd_sleef_log_u35d_sve128_f64 +#define xlogf nsimd_sleef_log_u35d_sve128_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_sve128_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_sve128_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_sve128_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_sve128_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_sve128_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_sve128_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_sve128_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_sve128_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_sve128_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_sve128_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_sve128_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_sve128_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_sve128_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_sve128_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_sve128_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_sve128_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_sve128_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_sve128_f32 +#define xlog_u1 nsimd_sleef_log_u10d_sve128_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_sve128_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_sve128_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_sve128_f32 +#define xexp nsimd_sleef_exp_u10d_sve128_f64 +#define xexpf nsimd_sleef_exp_u10d_sve128_f32 +#define xpow nsimd_sleef_pow_u10d_sve128_f64 +#define xpowf nsimd_sleef_pow_u10d_sve128_f32 +#define xsinh nsimd_sleef_sinh_u10d_sve128_f64 +#define xsinhf nsimd_sleef_sinh_u10d_sve128_f32 +#define xcosh nsimd_sleef_cosh_u10d_sve128_f64 +#define xcoshf nsimd_sleef_cosh_u10d_sve128_f32 +#define xtanh nsimd_sleef_tanh_u10d_sve128_f64 +#define xtanhf nsimd_sleef_tanh_u10d_sve128_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_sve128_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_sve128_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_sve128_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_sve128_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_sve128_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_sve128_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_sve128_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_sve128_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_sve128_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_sve128_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_sve128_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_sve128_f32 +#define xasinh nsimd_sleef_asinh_u10d_sve128_f64 +#define xasinhf nsimd_sleef_asinh_u10d_sve128_f32 +#define xacosh nsimd_sleef_acosh_u10d_sve128_f64 +#define xacoshf nsimd_sleef_acosh_u10d_sve128_f32 +#define xatanh nsimd_sleef_atanh_u10d_sve128_f64 +#define xatanhf nsimd_sleef_atanh_u10d_sve128_f32 +#define xexp2 nsimd_sleef_exp2_u10d_sve128_f64 +#define xexp2f nsimd_sleef_exp2_u10d_sve128_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_sve128_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_sve128_f32 +#define xexp10 nsimd_sleef_exp10_u10d_sve128_f64 +#define xexp10f nsimd_sleef_exp10_u10d_sve128_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_sve128_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_sve128_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_sve128_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_sve128_f32 +#define xlog10 nsimd_sleef_log10_u10d_sve128_f64 +#define xlog10f nsimd_sleef_log10_u10d_sve128_f32 +#define xlog2 nsimd_sleef_log2_u10d_sve128_f64 +#define xlog2f nsimd_sleef_log2_u10d_sve128_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_sve128_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_sve128_f32 +#define xlog1p nsimd_sleef_log1p_u10d_sve128_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_sve128_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_sve128_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_sve128_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_sve128_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_sve128_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_sve128_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_sve128_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_sve128_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_sve128_f32 +#define xldexp nsimd_sleef_ldexp_sve128_f64 +#define xldexpf nsimd_sleef_ldexp_sve128_f32 +#define xilogb nsimd_sleef_ilogb_sve128_f64 +#define xilogbf nsimd_sleef_ilogb_sve128_f32 +#define xfma nsimd_sleef_fma_sve128_f64 +#define xfmaf nsimd_sleef_fma_sve128_f32 +#define xsqrt nsimd_sleef_sqrt_sve128_f64 +#define xsqrtf nsimd_sleef_sqrt_sve128_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_sve128_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_sve128_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_sve128_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_sve128_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_sve128_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_sve128_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_sve128_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_sve128_f32 +#define xfabs nsimd_sleef_fabs_sve128_f64 +#define xfabsf nsimd_sleef_fabs_sve128_f32 +#define xcopysign nsimd_sleef_copysign_sve128_f64 +#define xcopysignf nsimd_sleef_copysign_sve128_f32 +#define xfmax nsimd_sleef_fmax_sve128_f64 +#define xfmaxf nsimd_sleef_fmax_sve128_f32 +#define xfmin nsimd_sleef_fmin_sve128_f64 +#define xfminf nsimd_sleef_fmin_sve128_f32 +#define xfdim nsimd_sleef_fdim_sve128_f64 +#define xfdimf nsimd_sleef_fdim_sve128_f32 +#define xtrunc nsimd_sleef_trunc_sve128_f64 +#define xtruncf nsimd_sleef_trunc_sve128_f32 +#define xfloor nsimd_sleef_floor_sve128_f64 +#define xfloorf nsimd_sleef_floor_sve128_f32 +#define xceil nsimd_sleef_ceil_sve128_f64 +#define xceilf nsimd_sleef_ceil_sve128_f32 +#define xround nsimd_sleef_round_sve128_f64 +#define xroundf nsimd_sleef_round_sve128_f32 +#define xrint nsimd_sleef_rint_sve128_f64 +#define xrintf nsimd_sleef_rint_sve128_f32 +#define xnextafter nsimd_sleef_nextafter_sve128_f64 +#define xnextafterf nsimd_sleef_nextafter_sve128_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve128_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve128_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve128_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve128_f32 +#define xfmod nsimd_sleef_fmod_sve128_f64 +#define xfmodf nsimd_sleef_fmod_sve128_f32 +#define xremainder nsimd_sleef_remainder_sve128_f64 +#define xremainderf nsimd_sleef_remainder_sve128_f32 +#define xmodf nsimd_sleef_modf_sve128_f64 +#define xmodff nsimd_sleef_modf_sve128_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_sve128_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_sve128_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_sve128_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_sve128_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_sve128_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_sve128_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_sve128_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_sve128_f32 +#define xgetInt nsimd_sleef_getInt_sve128_f64 +#define xgetIntf nsimd_sleef_getInt_sve128_f32 +#define xgetPtr nsimd_sleef_getPtr_sve128_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve128_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_sve128_f64 +#define xsinf nsimd_sleef_sin_u35_sve128_f32 +#define xcos nsimd_sleef_cos_u35_sve128_f64 +#define xcosf nsimd_sleef_cos_u35_sve128_f32 +#define xsincos nsimd_sleef_sincos_u35_sve128_f64 +#define xsincosf nsimd_sleef_sincos_u35_sve128_f32 +#define xtan nsimd_sleef_tan_u35_sve128_f64 +#define xtanf nsimd_sleef_tan_u35_sve128_f32 +#define xasin nsimd_sleef_asin_u35_sve128_f64 +#define xasinf nsimd_sleef_asin_u35_sve128_f32 +#define xacos nsimd_sleef_acos_u35_sve128_f64 +#define xacosf nsimd_sleef_acos_u35_sve128_f32 +#define xatan nsimd_sleef_atan_u35_sve128_f64 +#define xatanf nsimd_sleef_atan_u35_sve128_f32 +#define xatan2 nsimd_sleef_atan2_u35_sve128_f64 +#define xatan2f nsimd_sleef_atan2_u35_sve128_f32 +#define xlog nsimd_sleef_log_u35_sve128_f64 +#define xlogf nsimd_sleef_log_u35_sve128_f32 +#define xcbrt nsimd_sleef_cbrt_u35_sve128_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_sve128_f32 +#define xsin_u1 nsimd_sleef_sin_u10_sve128_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_sve128_f32 +#define xcos_u1 nsimd_sleef_cos_u10_sve128_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_sve128_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_sve128_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_sve128_f32 +#define xtan_u1 nsimd_sleef_tan_u10_sve128_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_sve128_f32 +#define xasin_u1 nsimd_sleef_asin_u10_sve128_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_sve128_f32 +#define xacos_u1 nsimd_sleef_acos_u10_sve128_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_sve128_f32 +#define xatan_u1 nsimd_sleef_atan_u10_sve128_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_sve128_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_sve128_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_sve128_f32 +#define xlog_u1 nsimd_sleef_log_u10_sve128_f64 +#define xlogf_u1 nsimd_sleef_log_u10_sve128_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_sve128_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_sve128_f32 +#define xexp nsimd_sleef_exp_u10_sve128_f64 +#define xexpf nsimd_sleef_exp_u10_sve128_f32 +#define xpow nsimd_sleef_pow_u10_sve128_f64 +#define xpowf nsimd_sleef_pow_u10_sve128_f32 +#define xsinh nsimd_sleef_sinh_u10_sve128_f64 +#define xsinhf nsimd_sleef_sinh_u10_sve128_f32 +#define xcosh nsimd_sleef_cosh_u10_sve128_f64 +#define xcoshf nsimd_sleef_cosh_u10_sve128_f32 +#define xtanh nsimd_sleef_tanh_u10_sve128_f64 +#define xtanhf nsimd_sleef_tanh_u10_sve128_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_sve128_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_sve128_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_sve128_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_sve128_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_sve128_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_sve128_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_sve128_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_sve128_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_sve128_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_sve128_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_sve128_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_sve128_f32 +#define xasinh nsimd_sleef_asinh_u10_sve128_f64 +#define xasinhf nsimd_sleef_asinh_u10_sve128_f32 +#define xacosh nsimd_sleef_acosh_u10_sve128_f64 +#define xacoshf nsimd_sleef_acosh_u10_sve128_f32 +#define xatanh nsimd_sleef_atanh_u10_sve128_f64 +#define xatanhf nsimd_sleef_atanh_u10_sve128_f32 +#define xexp2 nsimd_sleef_exp2_u10_sve128_f64 +#define xexp2f nsimd_sleef_exp2_u10_sve128_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_sve128_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_sve128_f32 +#define xexp10 nsimd_sleef_exp10_u10_sve128_f64 +#define xexp10f nsimd_sleef_exp10_u10_sve128_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_sve128_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_sve128_f32 +#define xexpm1 nsimd_sleef_expm1_u10_sve128_f64 +#define xexpm1f nsimd_sleef_expm1_u10_sve128_f32 +#define xlog10 nsimd_sleef_log10_u10_sve128_f64 +#define xlog10f nsimd_sleef_log10_u10_sve128_f32 +#define xlog2 nsimd_sleef_log2_u10_sve128_f64 +#define xlog2f nsimd_sleef_log2_u10_sve128_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_sve128_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_sve128_f32 +#define xlog1p nsimd_sleef_log1p_u10_sve128_f64 +#define xlog1pf nsimd_sleef_log1p_u10_sve128_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_sve128_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_sve128_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_sve128_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_sve128_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_sve128_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_sve128_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_sve128_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_sve128_f32 +#define xldexp nsimd_sleef_ldexp_sve128_f64 +#define xldexpf nsimd_sleef_ldexp_sve128_f32 +#define xilogb nsimd_sleef_ilogb_sve128_f64 +#define xilogbf nsimd_sleef_ilogb_sve128_f32 +#define xfma nsimd_sleef_fma_sve128_f64 +#define xfmaf nsimd_sleef_fma_sve128_f32 +#define xsqrt nsimd_sleef_sqrt_sve128_f64 +#define xsqrtf nsimd_sleef_sqrt_sve128_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_sve128_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_sve128_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_sve128_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_sve128_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_sve128_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_sve128_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_sve128_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_sve128_f32 +#define xfabs nsimd_sleef_fabs_sve128_f64 +#define xfabsf nsimd_sleef_fabs_sve128_f32 +#define xcopysign nsimd_sleef_copysign_sve128_f64 +#define xcopysignf nsimd_sleef_copysign_sve128_f32 +#define xfmax nsimd_sleef_fmax_sve128_f64 +#define xfmaxf nsimd_sleef_fmax_sve128_f32 +#define xfmin nsimd_sleef_fmin_sve128_f64 +#define xfminf nsimd_sleef_fmin_sve128_f32 +#define xfdim nsimd_sleef_fdim_sve128_f64 +#define xfdimf nsimd_sleef_fdim_sve128_f32 +#define xtrunc nsimd_sleef_trunc_sve128_f64 +#define xtruncf nsimd_sleef_trunc_sve128_f32 +#define xfloor nsimd_sleef_floor_sve128_f64 +#define xfloorf nsimd_sleef_floor_sve128_f32 +#define xceil nsimd_sleef_ceil_sve128_f64 +#define xceilf nsimd_sleef_ceil_sve128_f32 +#define xround nsimd_sleef_round_sve128_f64 +#define xroundf nsimd_sleef_round_sve128_f32 +#define xrint nsimd_sleef_rint_sve128_f64 +#define xrintf nsimd_sleef_rint_sve128_f32 +#define xnextafter nsimd_sleef_nextafter_sve128_f64 +#define xnextafterf nsimd_sleef_nextafter_sve128_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve128_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve128_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve128_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve128_f32 +#define xfmod nsimd_sleef_fmod_sve128_f64 +#define xfmodf nsimd_sleef_fmod_sve128_f32 +#define xremainder nsimd_sleef_remainder_sve128_f64 +#define xremainderf nsimd_sleef_remainder_sve128_f32 +#define xmodf nsimd_sleef_modf_sve128_f64 +#define xmodff nsimd_sleef_modf_sve128_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_sve128_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_sve128_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_sve128_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_sve128_f32 +#define xerf_u1 nsimd_sleef_erf_u10_sve128_f64 +#define xerff_u1 nsimd_sleef_erf_u10_sve128_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_sve128_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_sve128_f32 +#define xgetInt nsimd_sleef_getInt_sve128_f64 +#define xgetIntf nsimd_sleef_getInt_sve128_f32 +#define xgetPtr nsimd_sleef_getPtr_sve128_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve128_f32 + + #endif + + #define rempi nsimd_sleef_rempi_sve128 + #define rempif nsimd_sleef_rempif_sve128 + #define rempisub nsimd_sleef_rempisub_sve128 + #define rempisubf nsimd_sleef_rempisubf_sve128 + #define gammak nsimd_gammak_sve128 + #define gammafk nsimd_gammafk_sve128 + + #endif + + /* ------------------------------------------------------------------------- */ + /* Naming of functions sve256 */ + + #ifdef NSIMD_SVE256 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_sve256_f64 +#define xsinf nsimd_sleef_sin_u35d_sve256_f32 +#define xcos nsimd_sleef_cos_u35d_sve256_f64 +#define xcosf nsimd_sleef_cos_u35d_sve256_f32 +#define xsincos nsimd_sleef_sincos_u35d_sve256_f64 +#define xsincosf nsimd_sleef_sincos_u35d_sve256_f32 +#define xtan nsimd_sleef_tan_u35d_sve256_f64 +#define xtanf nsimd_sleef_tan_u35d_sve256_f32 +#define xasin nsimd_sleef_asin_u35d_sve256_f64 +#define xasinf nsimd_sleef_asin_u35d_sve256_f32 +#define xacos nsimd_sleef_acos_u35d_sve256_f64 +#define xacosf nsimd_sleef_acos_u35d_sve256_f32 +#define xatan nsimd_sleef_atan_u35d_sve256_f64 +#define xatanf nsimd_sleef_atan_u35d_sve256_f32 +#define xatan2 nsimd_sleef_atan2_u35d_sve256_f64 +#define xatan2f nsimd_sleef_atan2_u35d_sve256_f32 +#define xlog nsimd_sleef_log_u35d_sve256_f64 +#define xlogf nsimd_sleef_log_u35d_sve256_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_sve256_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_sve256_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_sve256_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_sve256_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_sve256_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_sve256_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_sve256_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_sve256_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_sve256_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_sve256_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_sve256_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_sve256_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_sve256_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_sve256_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_sve256_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_sve256_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_sve256_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_sve256_f32 +#define xlog_u1 nsimd_sleef_log_u10d_sve256_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_sve256_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_sve256_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_sve256_f32 +#define xexp nsimd_sleef_exp_u10d_sve256_f64 +#define xexpf nsimd_sleef_exp_u10d_sve256_f32 +#define xpow nsimd_sleef_pow_u10d_sve256_f64 +#define xpowf nsimd_sleef_pow_u10d_sve256_f32 +#define xsinh nsimd_sleef_sinh_u10d_sve256_f64 +#define xsinhf nsimd_sleef_sinh_u10d_sve256_f32 +#define xcosh nsimd_sleef_cosh_u10d_sve256_f64 +#define xcoshf nsimd_sleef_cosh_u10d_sve256_f32 +#define xtanh nsimd_sleef_tanh_u10d_sve256_f64 +#define xtanhf nsimd_sleef_tanh_u10d_sve256_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_sve256_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_sve256_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_sve256_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_sve256_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_sve256_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_sve256_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_sve256_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_sve256_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_sve256_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_sve256_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_sve256_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_sve256_f32 +#define xasinh nsimd_sleef_asinh_u10d_sve256_f64 +#define xasinhf nsimd_sleef_asinh_u10d_sve256_f32 +#define xacosh nsimd_sleef_acosh_u10d_sve256_f64 +#define xacoshf nsimd_sleef_acosh_u10d_sve256_f32 +#define xatanh nsimd_sleef_atanh_u10d_sve256_f64 +#define xatanhf nsimd_sleef_atanh_u10d_sve256_f32 +#define xexp2 nsimd_sleef_exp2_u10d_sve256_f64 +#define xexp2f nsimd_sleef_exp2_u10d_sve256_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_sve256_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_sve256_f32 +#define xexp10 nsimd_sleef_exp10_u10d_sve256_f64 +#define xexp10f nsimd_sleef_exp10_u10d_sve256_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_sve256_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_sve256_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_sve256_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_sve256_f32 +#define xlog10 nsimd_sleef_log10_u10d_sve256_f64 +#define xlog10f nsimd_sleef_log10_u10d_sve256_f32 +#define xlog2 nsimd_sleef_log2_u10d_sve256_f64 +#define xlog2f nsimd_sleef_log2_u10d_sve256_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_sve256_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_sve256_f32 +#define xlog1p nsimd_sleef_log1p_u10d_sve256_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_sve256_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_sve256_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_sve256_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_sve256_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_sve256_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_sve256_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_sve256_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_sve256_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_sve256_f32 +#define xldexp nsimd_sleef_ldexp_sve256_f64 +#define xldexpf nsimd_sleef_ldexp_sve256_f32 +#define xilogb nsimd_sleef_ilogb_sve256_f64 +#define xilogbf nsimd_sleef_ilogb_sve256_f32 +#define xfma nsimd_sleef_fma_sve256_f64 +#define xfmaf nsimd_sleef_fma_sve256_f32 +#define xsqrt nsimd_sleef_sqrt_sve256_f64 +#define xsqrtf nsimd_sleef_sqrt_sve256_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_sve256_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_sve256_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_sve256_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_sve256_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_sve256_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_sve256_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_sve256_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_sve256_f32 +#define xfabs nsimd_sleef_fabs_sve256_f64 +#define xfabsf nsimd_sleef_fabs_sve256_f32 +#define xcopysign nsimd_sleef_copysign_sve256_f64 +#define xcopysignf nsimd_sleef_copysign_sve256_f32 +#define xfmax nsimd_sleef_fmax_sve256_f64 +#define xfmaxf nsimd_sleef_fmax_sve256_f32 +#define xfmin nsimd_sleef_fmin_sve256_f64 +#define xfminf nsimd_sleef_fmin_sve256_f32 +#define xfdim nsimd_sleef_fdim_sve256_f64 +#define xfdimf nsimd_sleef_fdim_sve256_f32 +#define xtrunc nsimd_sleef_trunc_sve256_f64 +#define xtruncf nsimd_sleef_trunc_sve256_f32 +#define xfloor nsimd_sleef_floor_sve256_f64 +#define xfloorf nsimd_sleef_floor_sve256_f32 +#define xceil nsimd_sleef_ceil_sve256_f64 +#define xceilf nsimd_sleef_ceil_sve256_f32 +#define xround nsimd_sleef_round_sve256_f64 +#define xroundf nsimd_sleef_round_sve256_f32 +#define xrint nsimd_sleef_rint_sve256_f64 +#define xrintf nsimd_sleef_rint_sve256_f32 +#define xnextafter nsimd_sleef_nextafter_sve256_f64 +#define xnextafterf nsimd_sleef_nextafter_sve256_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve256_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve256_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve256_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve256_f32 +#define xfmod nsimd_sleef_fmod_sve256_f64 +#define xfmodf nsimd_sleef_fmod_sve256_f32 +#define xremainder nsimd_sleef_remainder_sve256_f64 +#define xremainderf nsimd_sleef_remainder_sve256_f32 +#define xmodf nsimd_sleef_modf_sve256_f64 +#define xmodff nsimd_sleef_modf_sve256_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_sve256_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_sve256_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_sve256_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_sve256_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_sve256_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_sve256_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_sve256_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_sve256_f32 +#define xgetInt nsimd_sleef_getInt_sve256_f64 +#define xgetIntf nsimd_sleef_getInt_sve256_f32 +#define xgetPtr nsimd_sleef_getPtr_sve256_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve256_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_sve256_f64 +#define xsinf nsimd_sleef_sin_u35_sve256_f32 +#define xcos nsimd_sleef_cos_u35_sve256_f64 +#define xcosf nsimd_sleef_cos_u35_sve256_f32 +#define xsincos nsimd_sleef_sincos_u35_sve256_f64 +#define xsincosf nsimd_sleef_sincos_u35_sve256_f32 +#define xtan nsimd_sleef_tan_u35_sve256_f64 +#define xtanf nsimd_sleef_tan_u35_sve256_f32 +#define xasin nsimd_sleef_asin_u35_sve256_f64 +#define xasinf nsimd_sleef_asin_u35_sve256_f32 +#define xacos nsimd_sleef_acos_u35_sve256_f64 +#define xacosf nsimd_sleef_acos_u35_sve256_f32 +#define xatan nsimd_sleef_atan_u35_sve256_f64 +#define xatanf nsimd_sleef_atan_u35_sve256_f32 +#define xatan2 nsimd_sleef_atan2_u35_sve256_f64 +#define xatan2f nsimd_sleef_atan2_u35_sve256_f32 +#define xlog nsimd_sleef_log_u35_sve256_f64 +#define xlogf nsimd_sleef_log_u35_sve256_f32 +#define xcbrt nsimd_sleef_cbrt_u35_sve256_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_sve256_f32 +#define xsin_u1 nsimd_sleef_sin_u10_sve256_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_sve256_f32 +#define xcos_u1 nsimd_sleef_cos_u10_sve256_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_sve256_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_sve256_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_sve256_f32 +#define xtan_u1 nsimd_sleef_tan_u10_sve256_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_sve256_f32 +#define xasin_u1 nsimd_sleef_asin_u10_sve256_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_sve256_f32 +#define xacos_u1 nsimd_sleef_acos_u10_sve256_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_sve256_f32 +#define xatan_u1 nsimd_sleef_atan_u10_sve256_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_sve256_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_sve256_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_sve256_f32 +#define xlog_u1 nsimd_sleef_log_u10_sve256_f64 +#define xlogf_u1 nsimd_sleef_log_u10_sve256_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_sve256_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_sve256_f32 +#define xexp nsimd_sleef_exp_u10_sve256_f64 +#define xexpf nsimd_sleef_exp_u10_sve256_f32 +#define xpow nsimd_sleef_pow_u10_sve256_f64 +#define xpowf nsimd_sleef_pow_u10_sve256_f32 +#define xsinh nsimd_sleef_sinh_u10_sve256_f64 +#define xsinhf nsimd_sleef_sinh_u10_sve256_f32 +#define xcosh nsimd_sleef_cosh_u10_sve256_f64 +#define xcoshf nsimd_sleef_cosh_u10_sve256_f32 +#define xtanh nsimd_sleef_tanh_u10_sve256_f64 +#define xtanhf nsimd_sleef_tanh_u10_sve256_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_sve256_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_sve256_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_sve256_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_sve256_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_sve256_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_sve256_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_sve256_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_sve256_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_sve256_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_sve256_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_sve256_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_sve256_f32 +#define xasinh nsimd_sleef_asinh_u10_sve256_f64 +#define xasinhf nsimd_sleef_asinh_u10_sve256_f32 +#define xacosh nsimd_sleef_acosh_u10_sve256_f64 +#define xacoshf nsimd_sleef_acosh_u10_sve256_f32 +#define xatanh nsimd_sleef_atanh_u10_sve256_f64 +#define xatanhf nsimd_sleef_atanh_u10_sve256_f32 +#define xexp2 nsimd_sleef_exp2_u10_sve256_f64 +#define xexp2f nsimd_sleef_exp2_u10_sve256_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_sve256_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_sve256_f32 +#define xexp10 nsimd_sleef_exp10_u10_sve256_f64 +#define xexp10f nsimd_sleef_exp10_u10_sve256_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_sve256_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_sve256_f32 +#define xexpm1 nsimd_sleef_expm1_u10_sve256_f64 +#define xexpm1f nsimd_sleef_expm1_u10_sve256_f32 +#define xlog10 nsimd_sleef_log10_u10_sve256_f64 +#define xlog10f nsimd_sleef_log10_u10_sve256_f32 +#define xlog2 nsimd_sleef_log2_u10_sve256_f64 +#define xlog2f nsimd_sleef_log2_u10_sve256_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_sve256_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_sve256_f32 +#define xlog1p nsimd_sleef_log1p_u10_sve256_f64 +#define xlog1pf nsimd_sleef_log1p_u10_sve256_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_sve256_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_sve256_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_sve256_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_sve256_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_sve256_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_sve256_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_sve256_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_sve256_f32 +#define xldexp nsimd_sleef_ldexp_sve256_f64 +#define xldexpf nsimd_sleef_ldexp_sve256_f32 +#define xilogb nsimd_sleef_ilogb_sve256_f64 +#define xilogbf nsimd_sleef_ilogb_sve256_f32 +#define xfma nsimd_sleef_fma_sve256_f64 +#define xfmaf nsimd_sleef_fma_sve256_f32 +#define xsqrt nsimd_sleef_sqrt_sve256_f64 +#define xsqrtf nsimd_sleef_sqrt_sve256_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_sve256_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_sve256_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_sve256_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_sve256_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_sve256_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_sve256_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_sve256_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_sve256_f32 +#define xfabs nsimd_sleef_fabs_sve256_f64 +#define xfabsf nsimd_sleef_fabs_sve256_f32 +#define xcopysign nsimd_sleef_copysign_sve256_f64 +#define xcopysignf nsimd_sleef_copysign_sve256_f32 +#define xfmax nsimd_sleef_fmax_sve256_f64 +#define xfmaxf nsimd_sleef_fmax_sve256_f32 +#define xfmin nsimd_sleef_fmin_sve256_f64 +#define xfminf nsimd_sleef_fmin_sve256_f32 +#define xfdim nsimd_sleef_fdim_sve256_f64 +#define xfdimf nsimd_sleef_fdim_sve256_f32 +#define xtrunc nsimd_sleef_trunc_sve256_f64 +#define xtruncf nsimd_sleef_trunc_sve256_f32 +#define xfloor nsimd_sleef_floor_sve256_f64 +#define xfloorf nsimd_sleef_floor_sve256_f32 +#define xceil nsimd_sleef_ceil_sve256_f64 +#define xceilf nsimd_sleef_ceil_sve256_f32 +#define xround nsimd_sleef_round_sve256_f64 +#define xroundf nsimd_sleef_round_sve256_f32 +#define xrint nsimd_sleef_rint_sve256_f64 +#define xrintf nsimd_sleef_rint_sve256_f32 +#define xnextafter nsimd_sleef_nextafter_sve256_f64 +#define xnextafterf nsimd_sleef_nextafter_sve256_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve256_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve256_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve256_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve256_f32 +#define xfmod nsimd_sleef_fmod_sve256_f64 +#define xfmodf nsimd_sleef_fmod_sve256_f32 +#define xremainder nsimd_sleef_remainder_sve256_f64 +#define xremainderf nsimd_sleef_remainder_sve256_f32 +#define xmodf nsimd_sleef_modf_sve256_f64 +#define xmodff nsimd_sleef_modf_sve256_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_sve256_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_sve256_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_sve256_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_sve256_f32 +#define xerf_u1 nsimd_sleef_erf_u10_sve256_f64 +#define xerff_u1 nsimd_sleef_erf_u10_sve256_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_sve256_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_sve256_f32 +#define xgetInt nsimd_sleef_getInt_sve256_f64 +#define xgetIntf nsimd_sleef_getInt_sve256_f32 +#define xgetPtr nsimd_sleef_getPtr_sve256_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve256_f32 + + #endif + + #define rempi nsimd_sleef_rempi_sve256 + #define rempif nsimd_sleef_rempif_sve256 + #define rempisub nsimd_sleef_rempisub_sve256 + #define rempisubf nsimd_sleef_rempisubf_sve256 + #define gammak nsimd_gammak_sve256 + #define gammafk nsimd_gammafk_sve256 + + #endif + + /* ------------------------------------------------------------------------- */ + /* Naming of functions sve512 */ + + #ifdef NSIMD_SVE512 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_sve512_f64 +#define xsinf nsimd_sleef_sin_u35d_sve512_f32 +#define xcos nsimd_sleef_cos_u35d_sve512_f64 +#define xcosf nsimd_sleef_cos_u35d_sve512_f32 +#define xsincos nsimd_sleef_sincos_u35d_sve512_f64 +#define xsincosf nsimd_sleef_sincos_u35d_sve512_f32 +#define xtan nsimd_sleef_tan_u35d_sve512_f64 +#define xtanf nsimd_sleef_tan_u35d_sve512_f32 +#define xasin nsimd_sleef_asin_u35d_sve512_f64 +#define xasinf nsimd_sleef_asin_u35d_sve512_f32 +#define xacos nsimd_sleef_acos_u35d_sve512_f64 +#define xacosf nsimd_sleef_acos_u35d_sve512_f32 +#define xatan nsimd_sleef_atan_u35d_sve512_f64 +#define xatanf nsimd_sleef_atan_u35d_sve512_f32 +#define xatan2 nsimd_sleef_atan2_u35d_sve512_f64 +#define xatan2f nsimd_sleef_atan2_u35d_sve512_f32 +#define xlog nsimd_sleef_log_u35d_sve512_f64 +#define xlogf nsimd_sleef_log_u35d_sve512_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_sve512_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_sve512_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_sve512_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_sve512_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_sve512_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_sve512_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_sve512_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_sve512_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_sve512_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_sve512_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_sve512_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_sve512_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_sve512_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_sve512_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_sve512_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_sve512_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_sve512_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_sve512_f32 +#define xlog_u1 nsimd_sleef_log_u10d_sve512_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_sve512_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_sve512_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_sve512_f32 +#define xexp nsimd_sleef_exp_u10d_sve512_f64 +#define xexpf nsimd_sleef_exp_u10d_sve512_f32 +#define xpow nsimd_sleef_pow_u10d_sve512_f64 +#define xpowf nsimd_sleef_pow_u10d_sve512_f32 +#define xsinh nsimd_sleef_sinh_u10d_sve512_f64 +#define xsinhf nsimd_sleef_sinh_u10d_sve512_f32 +#define xcosh nsimd_sleef_cosh_u10d_sve512_f64 +#define xcoshf nsimd_sleef_cosh_u10d_sve512_f32 +#define xtanh nsimd_sleef_tanh_u10d_sve512_f64 +#define xtanhf nsimd_sleef_tanh_u10d_sve512_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_sve512_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_sve512_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_sve512_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_sve512_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_sve512_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_sve512_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_sve512_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_sve512_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_sve512_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_sve512_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_sve512_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_sve512_f32 +#define xasinh nsimd_sleef_asinh_u10d_sve512_f64 +#define xasinhf nsimd_sleef_asinh_u10d_sve512_f32 +#define xacosh nsimd_sleef_acosh_u10d_sve512_f64 +#define xacoshf nsimd_sleef_acosh_u10d_sve512_f32 +#define xatanh nsimd_sleef_atanh_u10d_sve512_f64 +#define xatanhf nsimd_sleef_atanh_u10d_sve512_f32 +#define xexp2 nsimd_sleef_exp2_u10d_sve512_f64 +#define xexp2f nsimd_sleef_exp2_u10d_sve512_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_sve512_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_sve512_f32 +#define xexp10 nsimd_sleef_exp10_u10d_sve512_f64 +#define xexp10f nsimd_sleef_exp10_u10d_sve512_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_sve512_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_sve512_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_sve512_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_sve512_f32 +#define xlog10 nsimd_sleef_log10_u10d_sve512_f64 +#define xlog10f nsimd_sleef_log10_u10d_sve512_f32 +#define xlog2 nsimd_sleef_log2_u10d_sve512_f64 +#define xlog2f nsimd_sleef_log2_u10d_sve512_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_sve512_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_sve512_f32 +#define xlog1p nsimd_sleef_log1p_u10d_sve512_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_sve512_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_sve512_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_sve512_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_sve512_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_sve512_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_sve512_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_sve512_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_sve512_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_sve512_f32 +#define xldexp nsimd_sleef_ldexp_sve512_f64 +#define xldexpf nsimd_sleef_ldexp_sve512_f32 +#define xilogb nsimd_sleef_ilogb_sve512_f64 +#define xilogbf nsimd_sleef_ilogb_sve512_f32 +#define xfma nsimd_sleef_fma_sve512_f64 +#define xfmaf nsimd_sleef_fma_sve512_f32 +#define xsqrt nsimd_sleef_sqrt_sve512_f64 +#define xsqrtf nsimd_sleef_sqrt_sve512_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_sve512_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_sve512_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_sve512_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_sve512_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_sve512_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_sve512_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_sve512_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_sve512_f32 +#define xfabs nsimd_sleef_fabs_sve512_f64 +#define xfabsf nsimd_sleef_fabs_sve512_f32 +#define xcopysign nsimd_sleef_copysign_sve512_f64 +#define xcopysignf nsimd_sleef_copysign_sve512_f32 +#define xfmax nsimd_sleef_fmax_sve512_f64 +#define xfmaxf nsimd_sleef_fmax_sve512_f32 +#define xfmin nsimd_sleef_fmin_sve512_f64 +#define xfminf nsimd_sleef_fmin_sve512_f32 +#define xfdim nsimd_sleef_fdim_sve512_f64 +#define xfdimf nsimd_sleef_fdim_sve512_f32 +#define xtrunc nsimd_sleef_trunc_sve512_f64 +#define xtruncf nsimd_sleef_trunc_sve512_f32 +#define xfloor nsimd_sleef_floor_sve512_f64 +#define xfloorf nsimd_sleef_floor_sve512_f32 +#define xceil nsimd_sleef_ceil_sve512_f64 +#define xceilf nsimd_sleef_ceil_sve512_f32 +#define xround nsimd_sleef_round_sve512_f64 +#define xroundf nsimd_sleef_round_sve512_f32 +#define xrint nsimd_sleef_rint_sve512_f64 +#define xrintf nsimd_sleef_rint_sve512_f32 +#define xnextafter nsimd_sleef_nextafter_sve512_f64 +#define xnextafterf nsimd_sleef_nextafter_sve512_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve512_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve512_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve512_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve512_f32 +#define xfmod nsimd_sleef_fmod_sve512_f64 +#define xfmodf nsimd_sleef_fmod_sve512_f32 +#define xremainder nsimd_sleef_remainder_sve512_f64 +#define xremainderf nsimd_sleef_remainder_sve512_f32 +#define xmodf nsimd_sleef_modf_sve512_f64 +#define xmodff nsimd_sleef_modf_sve512_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_sve512_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_sve512_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_sve512_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_sve512_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_sve512_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_sve512_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_sve512_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_sve512_f32 +#define xgetInt nsimd_sleef_getInt_sve512_f64 +#define xgetIntf nsimd_sleef_getInt_sve512_f32 +#define xgetPtr nsimd_sleef_getPtr_sve512_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve512_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_sve512_f64 +#define xsinf nsimd_sleef_sin_u35_sve512_f32 +#define xcos nsimd_sleef_cos_u35_sve512_f64 +#define xcosf nsimd_sleef_cos_u35_sve512_f32 +#define xsincos nsimd_sleef_sincos_u35_sve512_f64 +#define xsincosf nsimd_sleef_sincos_u35_sve512_f32 +#define xtan nsimd_sleef_tan_u35_sve512_f64 +#define xtanf nsimd_sleef_tan_u35_sve512_f32 +#define xasin nsimd_sleef_asin_u35_sve512_f64 +#define xasinf nsimd_sleef_asin_u35_sve512_f32 +#define xacos nsimd_sleef_acos_u35_sve512_f64 +#define xacosf nsimd_sleef_acos_u35_sve512_f32 +#define xatan nsimd_sleef_atan_u35_sve512_f64 +#define xatanf nsimd_sleef_atan_u35_sve512_f32 +#define xatan2 nsimd_sleef_atan2_u35_sve512_f64 +#define xatan2f nsimd_sleef_atan2_u35_sve512_f32 +#define xlog nsimd_sleef_log_u35_sve512_f64 +#define xlogf nsimd_sleef_log_u35_sve512_f32 +#define xcbrt nsimd_sleef_cbrt_u35_sve512_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_sve512_f32 +#define xsin_u1 nsimd_sleef_sin_u10_sve512_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_sve512_f32 +#define xcos_u1 nsimd_sleef_cos_u10_sve512_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_sve512_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_sve512_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_sve512_f32 +#define xtan_u1 nsimd_sleef_tan_u10_sve512_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_sve512_f32 +#define xasin_u1 nsimd_sleef_asin_u10_sve512_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_sve512_f32 +#define xacos_u1 nsimd_sleef_acos_u10_sve512_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_sve512_f32 +#define xatan_u1 nsimd_sleef_atan_u10_sve512_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_sve512_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_sve512_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_sve512_f32 +#define xlog_u1 nsimd_sleef_log_u10_sve512_f64 +#define xlogf_u1 nsimd_sleef_log_u10_sve512_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_sve512_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_sve512_f32 +#define xexp nsimd_sleef_exp_u10_sve512_f64 +#define xexpf nsimd_sleef_exp_u10_sve512_f32 +#define xpow nsimd_sleef_pow_u10_sve512_f64 +#define xpowf nsimd_sleef_pow_u10_sve512_f32 +#define xsinh nsimd_sleef_sinh_u10_sve512_f64 +#define xsinhf nsimd_sleef_sinh_u10_sve512_f32 +#define xcosh nsimd_sleef_cosh_u10_sve512_f64 +#define xcoshf nsimd_sleef_cosh_u10_sve512_f32 +#define xtanh nsimd_sleef_tanh_u10_sve512_f64 +#define xtanhf nsimd_sleef_tanh_u10_sve512_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_sve512_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_sve512_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_sve512_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_sve512_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_sve512_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_sve512_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_sve512_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_sve512_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_sve512_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_sve512_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_sve512_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_sve512_f32 +#define xasinh nsimd_sleef_asinh_u10_sve512_f64 +#define xasinhf nsimd_sleef_asinh_u10_sve512_f32 +#define xacosh nsimd_sleef_acosh_u10_sve512_f64 +#define xacoshf nsimd_sleef_acosh_u10_sve512_f32 +#define xatanh nsimd_sleef_atanh_u10_sve512_f64 +#define xatanhf nsimd_sleef_atanh_u10_sve512_f32 +#define xexp2 nsimd_sleef_exp2_u10_sve512_f64 +#define xexp2f nsimd_sleef_exp2_u10_sve512_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_sve512_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_sve512_f32 +#define xexp10 nsimd_sleef_exp10_u10_sve512_f64 +#define xexp10f nsimd_sleef_exp10_u10_sve512_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_sve512_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_sve512_f32 +#define xexpm1 nsimd_sleef_expm1_u10_sve512_f64 +#define xexpm1f nsimd_sleef_expm1_u10_sve512_f32 +#define xlog10 nsimd_sleef_log10_u10_sve512_f64 +#define xlog10f nsimd_sleef_log10_u10_sve512_f32 +#define xlog2 nsimd_sleef_log2_u10_sve512_f64 +#define xlog2f nsimd_sleef_log2_u10_sve512_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_sve512_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_sve512_f32 +#define xlog1p nsimd_sleef_log1p_u10_sve512_f64 +#define xlog1pf nsimd_sleef_log1p_u10_sve512_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_sve512_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_sve512_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_sve512_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_sve512_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_sve512_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_sve512_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_sve512_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_sve512_f32 +#define xldexp nsimd_sleef_ldexp_sve512_f64 +#define xldexpf nsimd_sleef_ldexp_sve512_f32 +#define xilogb nsimd_sleef_ilogb_sve512_f64 +#define xilogbf nsimd_sleef_ilogb_sve512_f32 +#define xfma nsimd_sleef_fma_sve512_f64 +#define xfmaf nsimd_sleef_fma_sve512_f32 +#define xsqrt nsimd_sleef_sqrt_sve512_f64 +#define xsqrtf nsimd_sleef_sqrt_sve512_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_sve512_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_sve512_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_sve512_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_sve512_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_sve512_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_sve512_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_sve512_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_sve512_f32 +#define xfabs nsimd_sleef_fabs_sve512_f64 +#define xfabsf nsimd_sleef_fabs_sve512_f32 +#define xcopysign nsimd_sleef_copysign_sve512_f64 +#define xcopysignf nsimd_sleef_copysign_sve512_f32 +#define xfmax nsimd_sleef_fmax_sve512_f64 +#define xfmaxf nsimd_sleef_fmax_sve512_f32 +#define xfmin nsimd_sleef_fmin_sve512_f64 +#define xfminf nsimd_sleef_fmin_sve512_f32 +#define xfdim nsimd_sleef_fdim_sve512_f64 +#define xfdimf nsimd_sleef_fdim_sve512_f32 +#define xtrunc nsimd_sleef_trunc_sve512_f64 +#define xtruncf nsimd_sleef_trunc_sve512_f32 +#define xfloor nsimd_sleef_floor_sve512_f64 +#define xfloorf nsimd_sleef_floor_sve512_f32 +#define xceil nsimd_sleef_ceil_sve512_f64 +#define xceilf nsimd_sleef_ceil_sve512_f32 +#define xround nsimd_sleef_round_sve512_f64 +#define xroundf nsimd_sleef_round_sve512_f32 +#define xrint nsimd_sleef_rint_sve512_f64 +#define xrintf nsimd_sleef_rint_sve512_f32 +#define xnextafter nsimd_sleef_nextafter_sve512_f64 +#define xnextafterf nsimd_sleef_nextafter_sve512_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve512_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve512_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve512_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve512_f32 +#define xfmod nsimd_sleef_fmod_sve512_f64 +#define xfmodf nsimd_sleef_fmod_sve512_f32 +#define xremainder nsimd_sleef_remainder_sve512_f64 +#define xremainderf nsimd_sleef_remainder_sve512_f32 +#define xmodf nsimd_sleef_modf_sve512_f64 +#define xmodff nsimd_sleef_modf_sve512_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_sve512_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_sve512_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_sve512_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_sve512_f32 +#define xerf_u1 nsimd_sleef_erf_u10_sve512_f64 +#define xerff_u1 nsimd_sleef_erf_u10_sve512_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_sve512_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_sve512_f32 +#define xgetInt nsimd_sleef_getInt_sve512_f64 +#define xgetIntf nsimd_sleef_getInt_sve512_f32 +#define xgetPtr nsimd_sleef_getPtr_sve512_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve512_f32 + + #endif + + #define rempi nsimd_sleef_rempi_sve512 + #define rempif nsimd_sleef_rempif_sve512 + #define rempisub nsimd_sleef_rempisub_sve512 + #define rempisubf nsimd_sleef_rempisubf_sve512 + #define gammak nsimd_gammak_sve512 + #define gammafk nsimd_gammafk_sve512 + + #endif + + /* ------------------------------------------------------------------------- */ + /* Naming of functions sve1024 */ + + #ifdef NSIMD_SVE1024 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_sve1024_f64 +#define xsinf nsimd_sleef_sin_u35d_sve1024_f32 +#define xcos nsimd_sleef_cos_u35d_sve1024_f64 +#define xcosf nsimd_sleef_cos_u35d_sve1024_f32 +#define xsincos nsimd_sleef_sincos_u35d_sve1024_f64 +#define xsincosf nsimd_sleef_sincos_u35d_sve1024_f32 +#define xtan nsimd_sleef_tan_u35d_sve1024_f64 +#define xtanf nsimd_sleef_tan_u35d_sve1024_f32 +#define xasin nsimd_sleef_asin_u35d_sve1024_f64 +#define xasinf nsimd_sleef_asin_u35d_sve1024_f32 +#define xacos nsimd_sleef_acos_u35d_sve1024_f64 +#define xacosf nsimd_sleef_acos_u35d_sve1024_f32 +#define xatan nsimd_sleef_atan_u35d_sve1024_f64 +#define xatanf nsimd_sleef_atan_u35d_sve1024_f32 +#define xatan2 nsimd_sleef_atan2_u35d_sve1024_f64 +#define xatan2f nsimd_sleef_atan2_u35d_sve1024_f32 +#define xlog nsimd_sleef_log_u35d_sve1024_f64 +#define xlogf nsimd_sleef_log_u35d_sve1024_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_sve1024_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_sve1024_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_sve1024_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_sve1024_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_sve1024_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_sve1024_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_sve1024_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_sve1024_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_sve1024_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_sve1024_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_sve1024_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_sve1024_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_sve1024_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_sve1024_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_sve1024_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_sve1024_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_sve1024_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_sve1024_f32 +#define xlog_u1 nsimd_sleef_log_u10d_sve1024_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_sve1024_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_sve1024_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_sve1024_f32 +#define xexp nsimd_sleef_exp_u10d_sve1024_f64 +#define xexpf nsimd_sleef_exp_u10d_sve1024_f32 +#define xpow nsimd_sleef_pow_u10d_sve1024_f64 +#define xpowf nsimd_sleef_pow_u10d_sve1024_f32 +#define xsinh nsimd_sleef_sinh_u10d_sve1024_f64 +#define xsinhf nsimd_sleef_sinh_u10d_sve1024_f32 +#define xcosh nsimd_sleef_cosh_u10d_sve1024_f64 +#define xcoshf nsimd_sleef_cosh_u10d_sve1024_f32 +#define xtanh nsimd_sleef_tanh_u10d_sve1024_f64 +#define xtanhf nsimd_sleef_tanh_u10d_sve1024_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_sve1024_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_sve1024_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_sve1024_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_sve1024_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_sve1024_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_sve1024_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_sve1024_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_sve1024_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_sve1024_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_sve1024_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_sve1024_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_sve1024_f32 +#define xasinh nsimd_sleef_asinh_u10d_sve1024_f64 +#define xasinhf nsimd_sleef_asinh_u10d_sve1024_f32 +#define xacosh nsimd_sleef_acosh_u10d_sve1024_f64 +#define xacoshf nsimd_sleef_acosh_u10d_sve1024_f32 +#define xatanh nsimd_sleef_atanh_u10d_sve1024_f64 +#define xatanhf nsimd_sleef_atanh_u10d_sve1024_f32 +#define xexp2 nsimd_sleef_exp2_u10d_sve1024_f64 +#define xexp2f nsimd_sleef_exp2_u10d_sve1024_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_sve1024_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_sve1024_f32 +#define xexp10 nsimd_sleef_exp10_u10d_sve1024_f64 +#define xexp10f nsimd_sleef_exp10_u10d_sve1024_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_sve1024_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_sve1024_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_sve1024_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_sve1024_f32 +#define xlog10 nsimd_sleef_log10_u10d_sve1024_f64 +#define xlog10f nsimd_sleef_log10_u10d_sve1024_f32 +#define xlog2 nsimd_sleef_log2_u10d_sve1024_f64 +#define xlog2f nsimd_sleef_log2_u10d_sve1024_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_sve1024_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_sve1024_f32 +#define xlog1p nsimd_sleef_log1p_u10d_sve1024_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_sve1024_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_sve1024_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_sve1024_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_sve1024_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_sve1024_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_sve1024_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_sve1024_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_sve1024_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_sve1024_f32 +#define xldexp nsimd_sleef_ldexp_sve1024_f64 +#define xldexpf nsimd_sleef_ldexp_sve1024_f32 +#define xilogb nsimd_sleef_ilogb_sve1024_f64 +#define xilogbf nsimd_sleef_ilogb_sve1024_f32 +#define xfma nsimd_sleef_fma_sve1024_f64 +#define xfmaf nsimd_sleef_fma_sve1024_f32 +#define xsqrt nsimd_sleef_sqrt_sve1024_f64 +#define xsqrtf nsimd_sleef_sqrt_sve1024_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_sve1024_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_sve1024_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_sve1024_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_sve1024_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_sve1024_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_sve1024_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_sve1024_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_sve1024_f32 +#define xfabs nsimd_sleef_fabs_sve1024_f64 +#define xfabsf nsimd_sleef_fabs_sve1024_f32 +#define xcopysign nsimd_sleef_copysign_sve1024_f64 +#define xcopysignf nsimd_sleef_copysign_sve1024_f32 +#define xfmax nsimd_sleef_fmax_sve1024_f64 +#define xfmaxf nsimd_sleef_fmax_sve1024_f32 +#define xfmin nsimd_sleef_fmin_sve1024_f64 +#define xfminf nsimd_sleef_fmin_sve1024_f32 +#define xfdim nsimd_sleef_fdim_sve1024_f64 +#define xfdimf nsimd_sleef_fdim_sve1024_f32 +#define xtrunc nsimd_sleef_trunc_sve1024_f64 +#define xtruncf nsimd_sleef_trunc_sve1024_f32 +#define xfloor nsimd_sleef_floor_sve1024_f64 +#define xfloorf nsimd_sleef_floor_sve1024_f32 +#define xceil nsimd_sleef_ceil_sve1024_f64 +#define xceilf nsimd_sleef_ceil_sve1024_f32 +#define xround nsimd_sleef_round_sve1024_f64 +#define xroundf nsimd_sleef_round_sve1024_f32 +#define xrint nsimd_sleef_rint_sve1024_f64 +#define xrintf nsimd_sleef_rint_sve1024_f32 +#define xnextafter nsimd_sleef_nextafter_sve1024_f64 +#define xnextafterf nsimd_sleef_nextafter_sve1024_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve1024_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve1024_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve1024_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve1024_f32 +#define xfmod nsimd_sleef_fmod_sve1024_f64 +#define xfmodf nsimd_sleef_fmod_sve1024_f32 +#define xremainder nsimd_sleef_remainder_sve1024_f64 +#define xremainderf nsimd_sleef_remainder_sve1024_f32 +#define xmodf nsimd_sleef_modf_sve1024_f64 +#define xmodff nsimd_sleef_modf_sve1024_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_sve1024_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_sve1024_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_sve1024_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_sve1024_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_sve1024_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_sve1024_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_sve1024_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_sve1024_f32 +#define xgetInt nsimd_sleef_getInt_sve1024_f64 +#define xgetIntf nsimd_sleef_getInt_sve1024_f32 +#define xgetPtr nsimd_sleef_getPtr_sve1024_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve1024_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_sve1024_f64 +#define xsinf nsimd_sleef_sin_u35_sve1024_f32 +#define xcos nsimd_sleef_cos_u35_sve1024_f64 +#define xcosf nsimd_sleef_cos_u35_sve1024_f32 +#define xsincos nsimd_sleef_sincos_u35_sve1024_f64 +#define xsincosf nsimd_sleef_sincos_u35_sve1024_f32 +#define xtan nsimd_sleef_tan_u35_sve1024_f64 +#define xtanf nsimd_sleef_tan_u35_sve1024_f32 +#define xasin nsimd_sleef_asin_u35_sve1024_f64 +#define xasinf nsimd_sleef_asin_u35_sve1024_f32 +#define xacos nsimd_sleef_acos_u35_sve1024_f64 +#define xacosf nsimd_sleef_acos_u35_sve1024_f32 +#define xatan nsimd_sleef_atan_u35_sve1024_f64 +#define xatanf nsimd_sleef_atan_u35_sve1024_f32 +#define xatan2 nsimd_sleef_atan2_u35_sve1024_f64 +#define xatan2f nsimd_sleef_atan2_u35_sve1024_f32 +#define xlog nsimd_sleef_log_u35_sve1024_f64 +#define xlogf nsimd_sleef_log_u35_sve1024_f32 +#define xcbrt nsimd_sleef_cbrt_u35_sve1024_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_sve1024_f32 +#define xsin_u1 nsimd_sleef_sin_u10_sve1024_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_sve1024_f32 +#define xcos_u1 nsimd_sleef_cos_u10_sve1024_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_sve1024_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_sve1024_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_sve1024_f32 +#define xtan_u1 nsimd_sleef_tan_u10_sve1024_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_sve1024_f32 +#define xasin_u1 nsimd_sleef_asin_u10_sve1024_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_sve1024_f32 +#define xacos_u1 nsimd_sleef_acos_u10_sve1024_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_sve1024_f32 +#define xatan_u1 nsimd_sleef_atan_u10_sve1024_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_sve1024_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_sve1024_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_sve1024_f32 +#define xlog_u1 nsimd_sleef_log_u10_sve1024_f64 +#define xlogf_u1 nsimd_sleef_log_u10_sve1024_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_sve1024_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_sve1024_f32 +#define xexp nsimd_sleef_exp_u10_sve1024_f64 +#define xexpf nsimd_sleef_exp_u10_sve1024_f32 +#define xpow nsimd_sleef_pow_u10_sve1024_f64 +#define xpowf nsimd_sleef_pow_u10_sve1024_f32 +#define xsinh nsimd_sleef_sinh_u10_sve1024_f64 +#define xsinhf nsimd_sleef_sinh_u10_sve1024_f32 +#define xcosh nsimd_sleef_cosh_u10_sve1024_f64 +#define xcoshf nsimd_sleef_cosh_u10_sve1024_f32 +#define xtanh nsimd_sleef_tanh_u10_sve1024_f64 +#define xtanhf nsimd_sleef_tanh_u10_sve1024_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_sve1024_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_sve1024_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_sve1024_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_sve1024_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_sve1024_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_sve1024_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_sve1024_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_sve1024_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_sve1024_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_sve1024_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_sve1024_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_sve1024_f32 +#define xasinh nsimd_sleef_asinh_u10_sve1024_f64 +#define xasinhf nsimd_sleef_asinh_u10_sve1024_f32 +#define xacosh nsimd_sleef_acosh_u10_sve1024_f64 +#define xacoshf nsimd_sleef_acosh_u10_sve1024_f32 +#define xatanh nsimd_sleef_atanh_u10_sve1024_f64 +#define xatanhf nsimd_sleef_atanh_u10_sve1024_f32 +#define xexp2 nsimd_sleef_exp2_u10_sve1024_f64 +#define xexp2f nsimd_sleef_exp2_u10_sve1024_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_sve1024_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_sve1024_f32 +#define xexp10 nsimd_sleef_exp10_u10_sve1024_f64 +#define xexp10f nsimd_sleef_exp10_u10_sve1024_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_sve1024_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_sve1024_f32 +#define xexpm1 nsimd_sleef_expm1_u10_sve1024_f64 +#define xexpm1f nsimd_sleef_expm1_u10_sve1024_f32 +#define xlog10 nsimd_sleef_log10_u10_sve1024_f64 +#define xlog10f nsimd_sleef_log10_u10_sve1024_f32 +#define xlog2 nsimd_sleef_log2_u10_sve1024_f64 +#define xlog2f nsimd_sleef_log2_u10_sve1024_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_sve1024_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_sve1024_f32 +#define xlog1p nsimd_sleef_log1p_u10_sve1024_f64 +#define xlog1pf nsimd_sleef_log1p_u10_sve1024_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_sve1024_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_sve1024_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_sve1024_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_sve1024_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_sve1024_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_sve1024_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_sve1024_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_sve1024_f32 +#define xldexp nsimd_sleef_ldexp_sve1024_f64 +#define xldexpf nsimd_sleef_ldexp_sve1024_f32 +#define xilogb nsimd_sleef_ilogb_sve1024_f64 +#define xilogbf nsimd_sleef_ilogb_sve1024_f32 +#define xfma nsimd_sleef_fma_sve1024_f64 +#define xfmaf nsimd_sleef_fma_sve1024_f32 +#define xsqrt nsimd_sleef_sqrt_sve1024_f64 +#define xsqrtf nsimd_sleef_sqrt_sve1024_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_sve1024_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_sve1024_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_sve1024_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_sve1024_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_sve1024_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_sve1024_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_sve1024_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_sve1024_f32 +#define xfabs nsimd_sleef_fabs_sve1024_f64 +#define xfabsf nsimd_sleef_fabs_sve1024_f32 +#define xcopysign nsimd_sleef_copysign_sve1024_f64 +#define xcopysignf nsimd_sleef_copysign_sve1024_f32 +#define xfmax nsimd_sleef_fmax_sve1024_f64 +#define xfmaxf nsimd_sleef_fmax_sve1024_f32 +#define xfmin nsimd_sleef_fmin_sve1024_f64 +#define xfminf nsimd_sleef_fmin_sve1024_f32 +#define xfdim nsimd_sleef_fdim_sve1024_f64 +#define xfdimf nsimd_sleef_fdim_sve1024_f32 +#define xtrunc nsimd_sleef_trunc_sve1024_f64 +#define xtruncf nsimd_sleef_trunc_sve1024_f32 +#define xfloor nsimd_sleef_floor_sve1024_f64 +#define xfloorf nsimd_sleef_floor_sve1024_f32 +#define xceil nsimd_sleef_ceil_sve1024_f64 +#define xceilf nsimd_sleef_ceil_sve1024_f32 +#define xround nsimd_sleef_round_sve1024_f64 +#define xroundf nsimd_sleef_round_sve1024_f32 +#define xrint nsimd_sleef_rint_sve1024_f64 +#define xrintf nsimd_sleef_rint_sve1024_f32 +#define xnextafter nsimd_sleef_nextafter_sve1024_f64 +#define xnextafterf nsimd_sleef_nextafter_sve1024_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve1024_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve1024_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve1024_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve1024_f32 +#define xfmod nsimd_sleef_fmod_sve1024_f64 +#define xfmodf nsimd_sleef_fmod_sve1024_f32 +#define xremainder nsimd_sleef_remainder_sve1024_f64 +#define xremainderf nsimd_sleef_remainder_sve1024_f32 +#define xmodf nsimd_sleef_modf_sve1024_f64 +#define xmodff nsimd_sleef_modf_sve1024_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_sve1024_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_sve1024_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_sve1024_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_sve1024_f32 +#define xerf_u1 nsimd_sleef_erf_u10_sve1024_f64 +#define xerff_u1 nsimd_sleef_erf_u10_sve1024_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_sve1024_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_sve1024_f32 +#define xgetInt nsimd_sleef_getInt_sve1024_f64 +#define xgetIntf nsimd_sleef_getInt_sve1024_f32 +#define xgetPtr nsimd_sleef_getPtr_sve1024_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve1024_f32 + + #endif + + #define rempi nsimd_sleef_rempi_sve1024 + #define rempif nsimd_sleef_rempif_sve1024 + #define rempisub nsimd_sleef_rempisub_sve1024 + #define rempisubf nsimd_sleef_rempisubf_sve1024 + #define gammak nsimd_gammak_sve1024 + #define gammafk nsimd_gammafk_sve1024 + + #endif + + /* ------------------------------------------------------------------------- */ + /* Naming of functions sve2048 */ + + #ifdef NSIMD_SVE2048 + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_sve2048_f64 +#define xsinf nsimd_sleef_sin_u35d_sve2048_f32 +#define xcos nsimd_sleef_cos_u35d_sve2048_f64 +#define xcosf nsimd_sleef_cos_u35d_sve2048_f32 +#define xsincos nsimd_sleef_sincos_u35d_sve2048_f64 +#define xsincosf nsimd_sleef_sincos_u35d_sve2048_f32 +#define xtan nsimd_sleef_tan_u35d_sve2048_f64 +#define xtanf nsimd_sleef_tan_u35d_sve2048_f32 +#define xasin nsimd_sleef_asin_u35d_sve2048_f64 +#define xasinf nsimd_sleef_asin_u35d_sve2048_f32 +#define xacos nsimd_sleef_acos_u35d_sve2048_f64 +#define xacosf nsimd_sleef_acos_u35d_sve2048_f32 +#define xatan nsimd_sleef_atan_u35d_sve2048_f64 +#define xatanf nsimd_sleef_atan_u35d_sve2048_f32 +#define xatan2 nsimd_sleef_atan2_u35d_sve2048_f64 +#define xatan2f nsimd_sleef_atan2_u35d_sve2048_f32 +#define xlog nsimd_sleef_log_u35d_sve2048_f64 +#define xlogf nsimd_sleef_log_u35d_sve2048_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_sve2048_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_sve2048_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_sve2048_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_sve2048_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_sve2048_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_sve2048_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_sve2048_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_sve2048_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_sve2048_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_sve2048_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_sve2048_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_sve2048_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_sve2048_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_sve2048_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_sve2048_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_sve2048_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_sve2048_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_sve2048_f32 +#define xlog_u1 nsimd_sleef_log_u10d_sve2048_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_sve2048_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_sve2048_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_sve2048_f32 +#define xexp nsimd_sleef_exp_u10d_sve2048_f64 +#define xexpf nsimd_sleef_exp_u10d_sve2048_f32 +#define xpow nsimd_sleef_pow_u10d_sve2048_f64 +#define xpowf nsimd_sleef_pow_u10d_sve2048_f32 +#define xsinh nsimd_sleef_sinh_u10d_sve2048_f64 +#define xsinhf nsimd_sleef_sinh_u10d_sve2048_f32 +#define xcosh nsimd_sleef_cosh_u10d_sve2048_f64 +#define xcoshf nsimd_sleef_cosh_u10d_sve2048_f32 +#define xtanh nsimd_sleef_tanh_u10d_sve2048_f64 +#define xtanhf nsimd_sleef_tanh_u10d_sve2048_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_sve2048_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_sve2048_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_sve2048_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_sve2048_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_sve2048_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_sve2048_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_sve2048_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_sve2048_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_sve2048_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_sve2048_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_sve2048_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_sve2048_f32 +#define xasinh nsimd_sleef_asinh_u10d_sve2048_f64 +#define xasinhf nsimd_sleef_asinh_u10d_sve2048_f32 +#define xacosh nsimd_sleef_acosh_u10d_sve2048_f64 +#define xacoshf nsimd_sleef_acosh_u10d_sve2048_f32 +#define xatanh nsimd_sleef_atanh_u10d_sve2048_f64 +#define xatanhf nsimd_sleef_atanh_u10d_sve2048_f32 +#define xexp2 nsimd_sleef_exp2_u10d_sve2048_f64 +#define xexp2f nsimd_sleef_exp2_u10d_sve2048_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_sve2048_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_sve2048_f32 +#define xexp10 nsimd_sleef_exp10_u10d_sve2048_f64 +#define xexp10f nsimd_sleef_exp10_u10d_sve2048_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_sve2048_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_sve2048_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_sve2048_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_sve2048_f32 +#define xlog10 nsimd_sleef_log10_u10d_sve2048_f64 +#define xlog10f nsimd_sleef_log10_u10d_sve2048_f32 +#define xlog2 nsimd_sleef_log2_u10d_sve2048_f64 +#define xlog2f nsimd_sleef_log2_u10d_sve2048_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_sve2048_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_sve2048_f32 +#define xlog1p nsimd_sleef_log1p_u10d_sve2048_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_sve2048_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_sve2048_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_sve2048_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_sve2048_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_sve2048_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_sve2048_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_sve2048_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_sve2048_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_sve2048_f32 +#define xldexp nsimd_sleef_ldexp_sve2048_f64 +#define xldexpf nsimd_sleef_ldexp_sve2048_f32 +#define xilogb nsimd_sleef_ilogb_sve2048_f64 +#define xilogbf nsimd_sleef_ilogb_sve2048_f32 +#define xfma nsimd_sleef_fma_sve2048_f64 +#define xfmaf nsimd_sleef_fma_sve2048_f32 +#define xsqrt nsimd_sleef_sqrt_sve2048_f64 +#define xsqrtf nsimd_sleef_sqrt_sve2048_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_sve2048_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_sve2048_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_sve2048_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_sve2048_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_sve2048_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_sve2048_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_sve2048_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_sve2048_f32 +#define xfabs nsimd_sleef_fabs_sve2048_f64 +#define xfabsf nsimd_sleef_fabs_sve2048_f32 +#define xcopysign nsimd_sleef_copysign_sve2048_f64 +#define xcopysignf nsimd_sleef_copysign_sve2048_f32 +#define xfmax nsimd_sleef_fmax_sve2048_f64 +#define xfmaxf nsimd_sleef_fmax_sve2048_f32 +#define xfmin nsimd_sleef_fmin_sve2048_f64 +#define xfminf nsimd_sleef_fmin_sve2048_f32 +#define xfdim nsimd_sleef_fdim_sve2048_f64 +#define xfdimf nsimd_sleef_fdim_sve2048_f32 +#define xtrunc nsimd_sleef_trunc_sve2048_f64 +#define xtruncf nsimd_sleef_trunc_sve2048_f32 +#define xfloor nsimd_sleef_floor_sve2048_f64 +#define xfloorf nsimd_sleef_floor_sve2048_f32 +#define xceil nsimd_sleef_ceil_sve2048_f64 +#define xceilf nsimd_sleef_ceil_sve2048_f32 +#define xround nsimd_sleef_round_sve2048_f64 +#define xroundf nsimd_sleef_round_sve2048_f32 +#define xrint nsimd_sleef_rint_sve2048_f64 +#define xrintf nsimd_sleef_rint_sve2048_f32 +#define xnextafter nsimd_sleef_nextafter_sve2048_f64 +#define xnextafterf nsimd_sleef_nextafter_sve2048_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve2048_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve2048_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve2048_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve2048_f32 +#define xfmod nsimd_sleef_fmod_sve2048_f64 +#define xfmodf nsimd_sleef_fmod_sve2048_f32 +#define xremainder nsimd_sleef_remainder_sve2048_f64 +#define xremainderf nsimd_sleef_remainder_sve2048_f32 +#define xmodf nsimd_sleef_modf_sve2048_f64 +#define xmodff nsimd_sleef_modf_sve2048_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_sve2048_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_sve2048_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_sve2048_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_sve2048_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_sve2048_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_sve2048_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_sve2048_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_sve2048_f32 +#define xgetInt nsimd_sleef_getInt_sve2048_f64 +#define xgetIntf nsimd_sleef_getInt_sve2048_f32 +#define xgetPtr nsimd_sleef_getPtr_sve2048_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve2048_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_sve2048_f64 +#define xsinf nsimd_sleef_sin_u35_sve2048_f32 +#define xcos nsimd_sleef_cos_u35_sve2048_f64 +#define xcosf nsimd_sleef_cos_u35_sve2048_f32 +#define xsincos nsimd_sleef_sincos_u35_sve2048_f64 +#define xsincosf nsimd_sleef_sincos_u35_sve2048_f32 +#define xtan nsimd_sleef_tan_u35_sve2048_f64 +#define xtanf nsimd_sleef_tan_u35_sve2048_f32 +#define xasin nsimd_sleef_asin_u35_sve2048_f64 +#define xasinf nsimd_sleef_asin_u35_sve2048_f32 +#define xacos nsimd_sleef_acos_u35_sve2048_f64 +#define xacosf nsimd_sleef_acos_u35_sve2048_f32 +#define xatan nsimd_sleef_atan_u35_sve2048_f64 +#define xatanf nsimd_sleef_atan_u35_sve2048_f32 +#define xatan2 nsimd_sleef_atan2_u35_sve2048_f64 +#define xatan2f nsimd_sleef_atan2_u35_sve2048_f32 +#define xlog nsimd_sleef_log_u35_sve2048_f64 +#define xlogf nsimd_sleef_log_u35_sve2048_f32 +#define xcbrt nsimd_sleef_cbrt_u35_sve2048_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_sve2048_f32 +#define xsin_u1 nsimd_sleef_sin_u10_sve2048_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_sve2048_f32 +#define xcos_u1 nsimd_sleef_cos_u10_sve2048_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_sve2048_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_sve2048_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_sve2048_f32 +#define xtan_u1 nsimd_sleef_tan_u10_sve2048_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_sve2048_f32 +#define xasin_u1 nsimd_sleef_asin_u10_sve2048_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_sve2048_f32 +#define xacos_u1 nsimd_sleef_acos_u10_sve2048_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_sve2048_f32 +#define xatan_u1 nsimd_sleef_atan_u10_sve2048_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_sve2048_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_sve2048_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_sve2048_f32 +#define xlog_u1 nsimd_sleef_log_u10_sve2048_f64 +#define xlogf_u1 nsimd_sleef_log_u10_sve2048_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_sve2048_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_sve2048_f32 +#define xexp nsimd_sleef_exp_u10_sve2048_f64 +#define xexpf nsimd_sleef_exp_u10_sve2048_f32 +#define xpow nsimd_sleef_pow_u10_sve2048_f64 +#define xpowf nsimd_sleef_pow_u10_sve2048_f32 +#define xsinh nsimd_sleef_sinh_u10_sve2048_f64 +#define xsinhf nsimd_sleef_sinh_u10_sve2048_f32 +#define xcosh nsimd_sleef_cosh_u10_sve2048_f64 +#define xcoshf nsimd_sleef_cosh_u10_sve2048_f32 +#define xtanh nsimd_sleef_tanh_u10_sve2048_f64 +#define xtanhf nsimd_sleef_tanh_u10_sve2048_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_sve2048_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_sve2048_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_sve2048_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_sve2048_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_sve2048_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_sve2048_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_sve2048_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_sve2048_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_sve2048_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_sve2048_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_sve2048_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_sve2048_f32 +#define xasinh nsimd_sleef_asinh_u10_sve2048_f64 +#define xasinhf nsimd_sleef_asinh_u10_sve2048_f32 +#define xacosh nsimd_sleef_acosh_u10_sve2048_f64 +#define xacoshf nsimd_sleef_acosh_u10_sve2048_f32 +#define xatanh nsimd_sleef_atanh_u10_sve2048_f64 +#define xatanhf nsimd_sleef_atanh_u10_sve2048_f32 +#define xexp2 nsimd_sleef_exp2_u10_sve2048_f64 +#define xexp2f nsimd_sleef_exp2_u10_sve2048_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_sve2048_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_sve2048_f32 +#define xexp10 nsimd_sleef_exp10_u10_sve2048_f64 +#define xexp10f nsimd_sleef_exp10_u10_sve2048_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_sve2048_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_sve2048_f32 +#define xexpm1 nsimd_sleef_expm1_u10_sve2048_f64 +#define xexpm1f nsimd_sleef_expm1_u10_sve2048_f32 +#define xlog10 nsimd_sleef_log10_u10_sve2048_f64 +#define xlog10f nsimd_sleef_log10_u10_sve2048_f32 +#define xlog2 nsimd_sleef_log2_u10_sve2048_f64 +#define xlog2f nsimd_sleef_log2_u10_sve2048_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_sve2048_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_sve2048_f32 +#define xlog1p nsimd_sleef_log1p_u10_sve2048_f64 +#define xlog1pf nsimd_sleef_log1p_u10_sve2048_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_sve2048_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_sve2048_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_sve2048_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_sve2048_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_sve2048_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_sve2048_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_sve2048_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_sve2048_f32 +#define xldexp nsimd_sleef_ldexp_sve2048_f64 +#define xldexpf nsimd_sleef_ldexp_sve2048_f32 +#define xilogb nsimd_sleef_ilogb_sve2048_f64 +#define xilogbf nsimd_sleef_ilogb_sve2048_f32 +#define xfma nsimd_sleef_fma_sve2048_f64 +#define xfmaf nsimd_sleef_fma_sve2048_f32 +#define xsqrt nsimd_sleef_sqrt_sve2048_f64 +#define xsqrtf nsimd_sleef_sqrt_sve2048_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_sve2048_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_sve2048_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_sve2048_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_sve2048_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_sve2048_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_sve2048_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_sve2048_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_sve2048_f32 +#define xfabs nsimd_sleef_fabs_sve2048_f64 +#define xfabsf nsimd_sleef_fabs_sve2048_f32 +#define xcopysign nsimd_sleef_copysign_sve2048_f64 +#define xcopysignf nsimd_sleef_copysign_sve2048_f32 +#define xfmax nsimd_sleef_fmax_sve2048_f64 +#define xfmaxf nsimd_sleef_fmax_sve2048_f32 +#define xfmin nsimd_sleef_fmin_sve2048_f64 +#define xfminf nsimd_sleef_fmin_sve2048_f32 +#define xfdim nsimd_sleef_fdim_sve2048_f64 +#define xfdimf nsimd_sleef_fdim_sve2048_f32 +#define xtrunc nsimd_sleef_trunc_sve2048_f64 +#define xtruncf nsimd_sleef_trunc_sve2048_f32 +#define xfloor nsimd_sleef_floor_sve2048_f64 +#define xfloorf nsimd_sleef_floor_sve2048_f32 +#define xceil nsimd_sleef_ceil_sve2048_f64 +#define xceilf nsimd_sleef_ceil_sve2048_f32 +#define xround nsimd_sleef_round_sve2048_f64 +#define xroundf nsimd_sleef_round_sve2048_f32 +#define xrint nsimd_sleef_rint_sve2048_f64 +#define xrintf nsimd_sleef_rint_sve2048_f32 +#define xnextafter nsimd_sleef_nextafter_sve2048_f64 +#define xnextafterf nsimd_sleef_nextafter_sve2048_f32 +#define xfrfrexp nsimd_sleef_frfrexp_sve2048_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_sve2048_f32 +#define xexpfrexp nsimd_sleef_expfrexp_sve2048_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_sve2048_f32 +#define xfmod nsimd_sleef_fmod_sve2048_f64 +#define xfmodf nsimd_sleef_fmod_sve2048_f32 +#define xremainder nsimd_sleef_remainder_sve2048_f64 +#define xremainderf nsimd_sleef_remainder_sve2048_f32 +#define xmodf nsimd_sleef_modf_sve2048_f64 +#define xmodff nsimd_sleef_modf_sve2048_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_sve2048_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_sve2048_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_sve2048_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_sve2048_f32 +#define xerf_u1 nsimd_sleef_erf_u10_sve2048_f64 +#define xerff_u1 nsimd_sleef_erf_u10_sve2048_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_sve2048_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_sve2048_f32 +#define xgetInt nsimd_sleef_getInt_sve2048_f64 +#define xgetIntf nsimd_sleef_getInt_sve2048_f32 +#define xgetPtr nsimd_sleef_getPtr_sve2048_f64 +#define xgetPtrf nsimd_sleef_getPtr_sve2048_f32 + + #endif + + #define rempi nsimd_sleef_rempi_sve2048 + #define rempif nsimd_sleef_rempif_sve2048 + #define rempisub nsimd_sleef_rempisub_sve2048 + #define rempisubf nsimd_sleef_rempisubf_sve2048 + #define gammak nsimd_gammak_sve2048 + #define gammafk nsimd_gammafk_sve2048 + + #endif + + + +#endif + diff --git a/src/renamevsx.h b/src/renamevsx.h new file mode 100644 index 00000000..dd1533c7 --- /dev/null +++ b/src/renamevsx.h @@ -0,0 +1,667 @@ +#ifndef RENAMEVSX_H + #define RENAMEVSX_H + + /* ------------------------------------------------------------------------- */ + /* Naming of functions vmx */ + + #ifdef NSIMD_VMX + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_vmx_f64 +#define xsinf nsimd_sleef_sin_u35d_vmx_f32 +#define xcos nsimd_sleef_cos_u35d_vmx_f64 +#define xcosf nsimd_sleef_cos_u35d_vmx_f32 +#define xsincos nsimd_sleef_sincos_u35d_vmx_f64 +#define xsincosf nsimd_sleef_sincos_u35d_vmx_f32 +#define xtan nsimd_sleef_tan_u35d_vmx_f64 +#define xtanf nsimd_sleef_tan_u35d_vmx_f32 +#define xasin nsimd_sleef_asin_u35d_vmx_f64 +#define xasinf nsimd_sleef_asin_u35d_vmx_f32 +#define xacos nsimd_sleef_acos_u35d_vmx_f64 +#define xacosf nsimd_sleef_acos_u35d_vmx_f32 +#define xatan nsimd_sleef_atan_u35d_vmx_f64 +#define xatanf nsimd_sleef_atan_u35d_vmx_f32 +#define xatan2 nsimd_sleef_atan2_u35d_vmx_f64 +#define xatan2f nsimd_sleef_atan2_u35d_vmx_f32 +#define xlog nsimd_sleef_log_u35d_vmx_f64 +#define xlogf nsimd_sleef_log_u35d_vmx_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_vmx_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_vmx_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_vmx_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_vmx_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_vmx_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_vmx_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_vmx_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_vmx_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_vmx_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_vmx_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_vmx_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_vmx_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_vmx_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_vmx_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_vmx_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_vmx_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_vmx_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_vmx_f32 +#define xlog_u1 nsimd_sleef_log_u10d_vmx_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_vmx_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_vmx_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_vmx_f32 +#define xexp nsimd_sleef_exp_u10d_vmx_f64 +#define xexpf nsimd_sleef_exp_u10d_vmx_f32 +#define xpow nsimd_sleef_pow_u10d_vmx_f64 +#define xpowf nsimd_sleef_pow_u10d_vmx_f32 +#define xsinh nsimd_sleef_sinh_u10d_vmx_f64 +#define xsinhf nsimd_sleef_sinh_u10d_vmx_f32 +#define xcosh nsimd_sleef_cosh_u10d_vmx_f64 +#define xcoshf nsimd_sleef_cosh_u10d_vmx_f32 +#define xtanh nsimd_sleef_tanh_u10d_vmx_f64 +#define xtanhf nsimd_sleef_tanh_u10d_vmx_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_vmx_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_vmx_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_vmx_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_vmx_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_vmx_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_vmx_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_vmx_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_vmx_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_vmx_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_vmx_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_vmx_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_vmx_f32 +#define xasinh nsimd_sleef_asinh_u10d_vmx_f64 +#define xasinhf nsimd_sleef_asinh_u10d_vmx_f32 +#define xacosh nsimd_sleef_acosh_u10d_vmx_f64 +#define xacoshf nsimd_sleef_acosh_u10d_vmx_f32 +#define xatanh nsimd_sleef_atanh_u10d_vmx_f64 +#define xatanhf nsimd_sleef_atanh_u10d_vmx_f32 +#define xexp2 nsimd_sleef_exp2_u10d_vmx_f64 +#define xexp2f nsimd_sleef_exp2_u10d_vmx_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_vmx_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_vmx_f32 +#define xexp10 nsimd_sleef_exp10_u10d_vmx_f64 +#define xexp10f nsimd_sleef_exp10_u10d_vmx_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_vmx_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_vmx_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_vmx_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_vmx_f32 +#define xlog10 nsimd_sleef_log10_u10d_vmx_f64 +#define xlog10f nsimd_sleef_log10_u10d_vmx_f32 +#define xlog2 nsimd_sleef_log2_u10d_vmx_f64 +#define xlog2f nsimd_sleef_log2_u10d_vmx_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_vmx_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_vmx_f32 +#define xlog1p nsimd_sleef_log1p_u10d_vmx_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_vmx_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_vmx_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_vmx_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_vmx_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_vmx_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_vmx_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_vmx_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_vmx_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_vmx_f32 +#define xldexp nsimd_sleef_ldexp_vmx_f64 +#define xldexpf nsimd_sleef_ldexp_vmx_f32 +#define xilogb nsimd_sleef_ilogb_vmx_f64 +#define xilogbf nsimd_sleef_ilogb_vmx_f32 +#define xfma nsimd_sleef_fma_vmx_f64 +#define xfmaf nsimd_sleef_fma_vmx_f32 +#define xsqrt nsimd_sleef_sqrt_vmx_f64 +#define xsqrtf nsimd_sleef_sqrt_vmx_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_vmx_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_vmx_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_vmx_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_vmx_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_vmx_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_vmx_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_vmx_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_vmx_f32 +#define xfabs nsimd_sleef_fabs_vmx_f64 +#define xfabsf nsimd_sleef_fabs_vmx_f32 +#define xcopysign nsimd_sleef_copysign_vmx_f64 +#define xcopysignf nsimd_sleef_copysign_vmx_f32 +#define xfmax nsimd_sleef_fmax_vmx_f64 +#define xfmaxf nsimd_sleef_fmax_vmx_f32 +#define xfmin nsimd_sleef_fmin_vmx_f64 +#define xfminf nsimd_sleef_fmin_vmx_f32 +#define xfdim nsimd_sleef_fdim_vmx_f64 +#define xfdimf nsimd_sleef_fdim_vmx_f32 +#define xtrunc nsimd_sleef_trunc_vmx_f64 +#define xtruncf nsimd_sleef_trunc_vmx_f32 +#define xfloor nsimd_sleef_floor_vmx_f64 +#define xfloorf nsimd_sleef_floor_vmx_f32 +#define xceil nsimd_sleef_ceil_vmx_f64 +#define xceilf nsimd_sleef_ceil_vmx_f32 +#define xround nsimd_sleef_round_vmx_f64 +#define xroundf nsimd_sleef_round_vmx_f32 +#define xrint nsimd_sleef_rint_vmx_f64 +#define xrintf nsimd_sleef_rint_vmx_f32 +#define xnextafter nsimd_sleef_nextafter_vmx_f64 +#define xnextafterf nsimd_sleef_nextafter_vmx_f32 +#define xfrfrexp nsimd_sleef_frfrexp_vmx_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_vmx_f32 +#define xexpfrexp nsimd_sleef_expfrexp_vmx_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_vmx_f32 +#define xfmod nsimd_sleef_fmod_vmx_f64 +#define xfmodf nsimd_sleef_fmod_vmx_f32 +#define xremainder nsimd_sleef_remainder_vmx_f64 +#define xremainderf nsimd_sleef_remainder_vmx_f32 +#define xmodf nsimd_sleef_modf_vmx_f64 +#define xmodff nsimd_sleef_modf_vmx_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_vmx_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_vmx_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_vmx_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_vmx_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_vmx_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_vmx_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_vmx_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_vmx_f32 +#define xgetInt nsimd_sleef_getInt_vmx_f64 +#define xgetIntf nsimd_sleef_getInt_vmx_f32 +#define xgetPtr nsimd_sleef_getPtr_vmx_f64 +#define xgetPtrf nsimd_sleef_getPtr_vmx_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_vmx_f64 +#define xsinf nsimd_sleef_sin_u35_vmx_f32 +#define xcos nsimd_sleef_cos_u35_vmx_f64 +#define xcosf nsimd_sleef_cos_u35_vmx_f32 +#define xsincos nsimd_sleef_sincos_u35_vmx_f64 +#define xsincosf nsimd_sleef_sincos_u35_vmx_f32 +#define xtan nsimd_sleef_tan_u35_vmx_f64 +#define xtanf nsimd_sleef_tan_u35_vmx_f32 +#define xasin nsimd_sleef_asin_u35_vmx_f64 +#define xasinf nsimd_sleef_asin_u35_vmx_f32 +#define xacos nsimd_sleef_acos_u35_vmx_f64 +#define xacosf nsimd_sleef_acos_u35_vmx_f32 +#define xatan nsimd_sleef_atan_u35_vmx_f64 +#define xatanf nsimd_sleef_atan_u35_vmx_f32 +#define xatan2 nsimd_sleef_atan2_u35_vmx_f64 +#define xatan2f nsimd_sleef_atan2_u35_vmx_f32 +#define xlog nsimd_sleef_log_u35_vmx_f64 +#define xlogf nsimd_sleef_log_u35_vmx_f32 +#define xcbrt nsimd_sleef_cbrt_u35_vmx_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_vmx_f32 +#define xsin_u1 nsimd_sleef_sin_u10_vmx_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_vmx_f32 +#define xcos_u1 nsimd_sleef_cos_u10_vmx_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_vmx_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_vmx_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_vmx_f32 +#define xtan_u1 nsimd_sleef_tan_u10_vmx_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_vmx_f32 +#define xasin_u1 nsimd_sleef_asin_u10_vmx_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_vmx_f32 +#define xacos_u1 nsimd_sleef_acos_u10_vmx_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_vmx_f32 +#define xatan_u1 nsimd_sleef_atan_u10_vmx_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_vmx_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_vmx_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_vmx_f32 +#define xlog_u1 nsimd_sleef_log_u10_vmx_f64 +#define xlogf_u1 nsimd_sleef_log_u10_vmx_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_vmx_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_vmx_f32 +#define xexp nsimd_sleef_exp_u10_vmx_f64 +#define xexpf nsimd_sleef_exp_u10_vmx_f32 +#define xpow nsimd_sleef_pow_u10_vmx_f64 +#define xpowf nsimd_sleef_pow_u10_vmx_f32 +#define xsinh nsimd_sleef_sinh_u10_vmx_f64 +#define xsinhf nsimd_sleef_sinh_u10_vmx_f32 +#define xcosh nsimd_sleef_cosh_u10_vmx_f64 +#define xcoshf nsimd_sleef_cosh_u10_vmx_f32 +#define xtanh nsimd_sleef_tanh_u10_vmx_f64 +#define xtanhf nsimd_sleef_tanh_u10_vmx_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_vmx_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_vmx_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_vmx_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_vmx_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_vmx_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_vmx_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_vmx_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_vmx_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_vmx_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_vmx_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_vmx_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_vmx_f32 +#define xasinh nsimd_sleef_asinh_u10_vmx_f64 +#define xasinhf nsimd_sleef_asinh_u10_vmx_f32 +#define xacosh nsimd_sleef_acosh_u10_vmx_f64 +#define xacoshf nsimd_sleef_acosh_u10_vmx_f32 +#define xatanh nsimd_sleef_atanh_u10_vmx_f64 +#define xatanhf nsimd_sleef_atanh_u10_vmx_f32 +#define xexp2 nsimd_sleef_exp2_u10_vmx_f64 +#define xexp2f nsimd_sleef_exp2_u10_vmx_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_vmx_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_vmx_f32 +#define xexp10 nsimd_sleef_exp10_u10_vmx_f64 +#define xexp10f nsimd_sleef_exp10_u10_vmx_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_vmx_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_vmx_f32 +#define xexpm1 nsimd_sleef_expm1_u10_vmx_f64 +#define xexpm1f nsimd_sleef_expm1_u10_vmx_f32 +#define xlog10 nsimd_sleef_log10_u10_vmx_f64 +#define xlog10f nsimd_sleef_log10_u10_vmx_f32 +#define xlog2 nsimd_sleef_log2_u10_vmx_f64 +#define xlog2f nsimd_sleef_log2_u10_vmx_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_vmx_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_vmx_f32 +#define xlog1p nsimd_sleef_log1p_u10_vmx_f64 +#define xlog1pf nsimd_sleef_log1p_u10_vmx_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_vmx_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_vmx_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_vmx_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_vmx_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_vmx_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_vmx_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_vmx_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_vmx_f32 +#define xldexp nsimd_sleef_ldexp_vmx_f64 +#define xldexpf nsimd_sleef_ldexp_vmx_f32 +#define xilogb nsimd_sleef_ilogb_vmx_f64 +#define xilogbf nsimd_sleef_ilogb_vmx_f32 +#define xfma nsimd_sleef_fma_vmx_f64 +#define xfmaf nsimd_sleef_fma_vmx_f32 +#define xsqrt nsimd_sleef_sqrt_vmx_f64 +#define xsqrtf nsimd_sleef_sqrt_vmx_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_vmx_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_vmx_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_vmx_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_vmx_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_vmx_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_vmx_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_vmx_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_vmx_f32 +#define xfabs nsimd_sleef_fabs_vmx_f64 +#define xfabsf nsimd_sleef_fabs_vmx_f32 +#define xcopysign nsimd_sleef_copysign_vmx_f64 +#define xcopysignf nsimd_sleef_copysign_vmx_f32 +#define xfmax nsimd_sleef_fmax_vmx_f64 +#define xfmaxf nsimd_sleef_fmax_vmx_f32 +#define xfmin nsimd_sleef_fmin_vmx_f64 +#define xfminf nsimd_sleef_fmin_vmx_f32 +#define xfdim nsimd_sleef_fdim_vmx_f64 +#define xfdimf nsimd_sleef_fdim_vmx_f32 +#define xtrunc nsimd_sleef_trunc_vmx_f64 +#define xtruncf nsimd_sleef_trunc_vmx_f32 +#define xfloor nsimd_sleef_floor_vmx_f64 +#define xfloorf nsimd_sleef_floor_vmx_f32 +#define xceil nsimd_sleef_ceil_vmx_f64 +#define xceilf nsimd_sleef_ceil_vmx_f32 +#define xround nsimd_sleef_round_vmx_f64 +#define xroundf nsimd_sleef_round_vmx_f32 +#define xrint nsimd_sleef_rint_vmx_f64 +#define xrintf nsimd_sleef_rint_vmx_f32 +#define xnextafter nsimd_sleef_nextafter_vmx_f64 +#define xnextafterf nsimd_sleef_nextafter_vmx_f32 +#define xfrfrexp nsimd_sleef_frfrexp_vmx_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_vmx_f32 +#define xexpfrexp nsimd_sleef_expfrexp_vmx_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_vmx_f32 +#define xfmod nsimd_sleef_fmod_vmx_f64 +#define xfmodf nsimd_sleef_fmod_vmx_f32 +#define xremainder nsimd_sleef_remainder_vmx_f64 +#define xremainderf nsimd_sleef_remainder_vmx_f32 +#define xmodf nsimd_sleef_modf_vmx_f64 +#define xmodff nsimd_sleef_modf_vmx_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_vmx_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_vmx_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_vmx_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_vmx_f32 +#define xerf_u1 nsimd_sleef_erf_u10_vmx_f64 +#define xerff_u1 nsimd_sleef_erf_u10_vmx_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_vmx_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_vmx_f32 +#define xgetInt nsimd_sleef_getInt_vmx_f64 +#define xgetIntf nsimd_sleef_getInt_vmx_f32 +#define xgetPtr nsimd_sleef_getPtr_vmx_f64 +#define xgetPtrf nsimd_sleef_getPtr_vmx_f32 + + #endif + + #define rempi nsimd_sleef_rempi_vmx + #define rempif nsimd_sleef_rempif_vmx + #define rempisub nsimd_sleef_rempisub_vmx + #define rempisubf nsimd_sleef_rempisubf_vmx + #define gammak nsimd_gammak_vmx + #define gammafk nsimd_gammafk_vmx + + #endif + + /* ------------------------------------------------------------------------- */ + /* Naming of functions vsx */ + + #ifdef NSIMD_VSX + + #ifdef DETERMINISTIC + + #define xsin nsimd_sleef_sin_u35d_vsx_f64 +#define xsinf nsimd_sleef_sin_u35d_vsx_f32 +#define xcos nsimd_sleef_cos_u35d_vsx_f64 +#define xcosf nsimd_sleef_cos_u35d_vsx_f32 +#define xsincos nsimd_sleef_sincos_u35d_vsx_f64 +#define xsincosf nsimd_sleef_sincos_u35d_vsx_f32 +#define xtan nsimd_sleef_tan_u35d_vsx_f64 +#define xtanf nsimd_sleef_tan_u35d_vsx_f32 +#define xasin nsimd_sleef_asin_u35d_vsx_f64 +#define xasinf nsimd_sleef_asin_u35d_vsx_f32 +#define xacos nsimd_sleef_acos_u35d_vsx_f64 +#define xacosf nsimd_sleef_acos_u35d_vsx_f32 +#define xatan nsimd_sleef_atan_u35d_vsx_f64 +#define xatanf nsimd_sleef_atan_u35d_vsx_f32 +#define xatan2 nsimd_sleef_atan2_u35d_vsx_f64 +#define xatan2f nsimd_sleef_atan2_u35d_vsx_f32 +#define xlog nsimd_sleef_log_u35d_vsx_f64 +#define xlogf nsimd_sleef_log_u35d_vsx_f32 +#define xcbrt nsimd_sleef_cbrt_u35d_vsx_f64 +#define xcbrtf nsimd_sleef_cbrt_u35d_vsx_f32 +#define xsin_u1 nsimd_sleef_sin_u10d_vsx_f64 +#define xsinf_u1 nsimd_sleef_sin_u10d_vsx_f32 +#define xcos_u1 nsimd_sleef_cos_u10d_vsx_f64 +#define xcosf_u1 nsimd_sleef_cos_u10d_vsx_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10d_vsx_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10d_vsx_f32 +#define xtan_u1 nsimd_sleef_tan_u10d_vsx_f64 +#define xtanf_u1 nsimd_sleef_tan_u10d_vsx_f32 +#define xasin_u1 nsimd_sleef_asin_u10d_vsx_f64 +#define xasinf_u1 nsimd_sleef_asin_u10d_vsx_f32 +#define xacos_u1 nsimd_sleef_acos_u10d_vsx_f64 +#define xacosf_u1 nsimd_sleef_acos_u10d_vsx_f32 +#define xatan_u1 nsimd_sleef_atan_u10d_vsx_f64 +#define xatanf_u1 nsimd_sleef_atan_u10d_vsx_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10d_vsx_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10d_vsx_f32 +#define xlog_u1 nsimd_sleef_log_u10d_vsx_f64 +#define xlogf_u1 nsimd_sleef_log_u10d_vsx_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10d_vsx_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10d_vsx_f32 +#define xexp nsimd_sleef_exp_u10d_vsx_f64 +#define xexpf nsimd_sleef_exp_u10d_vsx_f32 +#define xpow nsimd_sleef_pow_u10d_vsx_f64 +#define xpowf nsimd_sleef_pow_u10d_vsx_f32 +#define xsinh nsimd_sleef_sinh_u10d_vsx_f64 +#define xsinhf nsimd_sleef_sinh_u10d_vsx_f32 +#define xcosh nsimd_sleef_cosh_u10d_vsx_f64 +#define xcoshf nsimd_sleef_cosh_u10d_vsx_f32 +#define xtanh nsimd_sleef_tanh_u10d_vsx_f64 +#define xtanhf nsimd_sleef_tanh_u10d_vsx_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35d_vsx_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35d_vsx_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35d_vsx_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35d_vsx_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35d_vsx_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35d_vsx_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500d_vsx_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500d_vsx_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500d_vsx_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500d_vsx_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500d_vsx_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500d_vsx_f32 +#define xasinh nsimd_sleef_asinh_u10d_vsx_f64 +#define xasinhf nsimd_sleef_asinh_u10d_vsx_f32 +#define xacosh nsimd_sleef_acosh_u10d_vsx_f64 +#define xacoshf nsimd_sleef_acosh_u10d_vsx_f32 +#define xatanh nsimd_sleef_atanh_u10d_vsx_f64 +#define xatanhf nsimd_sleef_atanh_u10d_vsx_f32 +#define xexp2 nsimd_sleef_exp2_u10d_vsx_f64 +#define xexp2f nsimd_sleef_exp2_u10d_vsx_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35d_vsx_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35d_vsx_f32 +#define xexp10 nsimd_sleef_exp10_u10d_vsx_f64 +#define xexp10f nsimd_sleef_exp10_u10d_vsx_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35d_vsx_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35d_vsx_f32 +#define xexpm1 nsimd_sleef_expm1_u10d_vsx_f64 +#define xexpm1f nsimd_sleef_expm1_u10d_vsx_f32 +#define xlog10 nsimd_sleef_log10_u10d_vsx_f64 +#define xlog10f nsimd_sleef_log10_u10d_vsx_f32 +#define xlog2 nsimd_sleef_log2_u10d_vsx_f64 +#define xlog2f nsimd_sleef_log2_u10d_vsx_f32 +#define xlog2_u35 nsimd_sleef_log2_u35d_vsx_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35d_vsx_f32 +#define xlog1p nsimd_sleef_log1p_u10d_vsx_f64 +#define xlog1pf nsimd_sleef_log1p_u10d_vsx_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05d_vsx_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05d_vsx_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35d_vsx_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35d_vsx_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05d_vsx_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05d_vsx_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05d_vsx_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05d_vsx_f32 +#define xldexp nsimd_sleef_ldexp_vsx_f64 +#define xldexpf nsimd_sleef_ldexp_vsx_f32 +#define xilogb nsimd_sleef_ilogb_vsx_f64 +#define xilogbf nsimd_sleef_ilogb_vsx_f32 +#define xfma nsimd_sleef_fma_vsx_f64 +#define xfmaf nsimd_sleef_fma_vsx_f32 +#define xsqrt nsimd_sleef_sqrt_vsx_f64 +#define xsqrtf nsimd_sleef_sqrt_vsx_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05d_vsx_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05d_vsx_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35d_vsx_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35d_vsx_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05d_vsx_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05d_vsx_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35d_vsx_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35d_vsx_f32 +#define xfabs nsimd_sleef_fabs_vsx_f64 +#define xfabsf nsimd_sleef_fabs_vsx_f32 +#define xcopysign nsimd_sleef_copysign_vsx_f64 +#define xcopysignf nsimd_sleef_copysign_vsx_f32 +#define xfmax nsimd_sleef_fmax_vsx_f64 +#define xfmaxf nsimd_sleef_fmax_vsx_f32 +#define xfmin nsimd_sleef_fmin_vsx_f64 +#define xfminf nsimd_sleef_fmin_vsx_f32 +#define xfdim nsimd_sleef_fdim_vsx_f64 +#define xfdimf nsimd_sleef_fdim_vsx_f32 +#define xtrunc nsimd_sleef_trunc_vsx_f64 +#define xtruncf nsimd_sleef_trunc_vsx_f32 +#define xfloor nsimd_sleef_floor_vsx_f64 +#define xfloorf nsimd_sleef_floor_vsx_f32 +#define xceil nsimd_sleef_ceil_vsx_f64 +#define xceilf nsimd_sleef_ceil_vsx_f32 +#define xround nsimd_sleef_round_vsx_f64 +#define xroundf nsimd_sleef_round_vsx_f32 +#define xrint nsimd_sleef_rint_vsx_f64 +#define xrintf nsimd_sleef_rint_vsx_f32 +#define xnextafter nsimd_sleef_nextafter_vsx_f64 +#define xnextafterf nsimd_sleef_nextafter_vsx_f32 +#define xfrfrexp nsimd_sleef_frfrexp_vsx_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_vsx_f32 +#define xexpfrexp nsimd_sleef_expfrexp_vsx_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_vsx_f32 +#define xfmod nsimd_sleef_fmod_vsx_f64 +#define xfmodf nsimd_sleef_fmod_vsx_f32 +#define xremainder nsimd_sleef_remainder_vsx_f64 +#define xremainderf nsimd_sleef_remainder_vsx_f32 +#define xmodf nsimd_sleef_modf_vsx_f64 +#define xmodff nsimd_sleef_modf_vsx_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10d_vsx_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10d_vsx_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10d_vsx_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10d_vsx_f32 +#define xerf_u1 nsimd_sleef_erf_u10d_vsx_f64 +#define xerff_u1 nsimd_sleef_erf_u10d_vsx_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15d_vsx_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15d_vsx_f32 +#define xgetInt nsimd_sleef_getInt_vsx_f64 +#define xgetIntf nsimd_sleef_getInt_vsx_f32 +#define xgetPtr nsimd_sleef_getPtr_vsx_f64 +#define xgetPtrf nsimd_sleef_getPtr_vsx_f32 + + #else + + #define xsin nsimd_sleef_sin_u35_vsx_f64 +#define xsinf nsimd_sleef_sin_u35_vsx_f32 +#define xcos nsimd_sleef_cos_u35_vsx_f64 +#define xcosf nsimd_sleef_cos_u35_vsx_f32 +#define xsincos nsimd_sleef_sincos_u35_vsx_f64 +#define xsincosf nsimd_sleef_sincos_u35_vsx_f32 +#define xtan nsimd_sleef_tan_u35_vsx_f64 +#define xtanf nsimd_sleef_tan_u35_vsx_f32 +#define xasin nsimd_sleef_asin_u35_vsx_f64 +#define xasinf nsimd_sleef_asin_u35_vsx_f32 +#define xacos nsimd_sleef_acos_u35_vsx_f64 +#define xacosf nsimd_sleef_acos_u35_vsx_f32 +#define xatan nsimd_sleef_atan_u35_vsx_f64 +#define xatanf nsimd_sleef_atan_u35_vsx_f32 +#define xatan2 nsimd_sleef_atan2_u35_vsx_f64 +#define xatan2f nsimd_sleef_atan2_u35_vsx_f32 +#define xlog nsimd_sleef_log_u35_vsx_f64 +#define xlogf nsimd_sleef_log_u35_vsx_f32 +#define xcbrt nsimd_sleef_cbrt_u35_vsx_f64 +#define xcbrtf nsimd_sleef_cbrt_u35_vsx_f32 +#define xsin_u1 nsimd_sleef_sin_u10_vsx_f64 +#define xsinf_u1 nsimd_sleef_sin_u10_vsx_f32 +#define xcos_u1 nsimd_sleef_cos_u10_vsx_f64 +#define xcosf_u1 nsimd_sleef_cos_u10_vsx_f32 +#define xsincos_u1 nsimd_sleef_sincos_u10_vsx_f64 +#define xsincosf_u1 nsimd_sleef_sincos_u10_vsx_f32 +#define xtan_u1 nsimd_sleef_tan_u10_vsx_f64 +#define xtanf_u1 nsimd_sleef_tan_u10_vsx_f32 +#define xasin_u1 nsimd_sleef_asin_u10_vsx_f64 +#define xasinf_u1 nsimd_sleef_asin_u10_vsx_f32 +#define xacos_u1 nsimd_sleef_acos_u10_vsx_f64 +#define xacosf_u1 nsimd_sleef_acos_u10_vsx_f32 +#define xatan_u1 nsimd_sleef_atan_u10_vsx_f64 +#define xatanf_u1 nsimd_sleef_atan_u10_vsx_f32 +#define xatan2_u1 nsimd_sleef_atan2_u10_vsx_f64 +#define xatan2f_u1 nsimd_sleef_atan2_u10_vsx_f32 +#define xlog_u1 nsimd_sleef_log_u10_vsx_f64 +#define xlogf_u1 nsimd_sleef_log_u10_vsx_f32 +#define xcbrt_u1 nsimd_sleef_cbrt_u10_vsx_f64 +#define xcbrtf_u1 nsimd_sleef_cbrt_u10_vsx_f32 +#define xexp nsimd_sleef_exp_u10_vsx_f64 +#define xexpf nsimd_sleef_exp_u10_vsx_f32 +#define xpow nsimd_sleef_pow_u10_vsx_f64 +#define xpowf nsimd_sleef_pow_u10_vsx_f32 +#define xsinh nsimd_sleef_sinh_u10_vsx_f64 +#define xsinhf nsimd_sleef_sinh_u10_vsx_f32 +#define xcosh nsimd_sleef_cosh_u10_vsx_f64 +#define xcoshf nsimd_sleef_cosh_u10_vsx_f32 +#define xtanh nsimd_sleef_tanh_u10_vsx_f64 +#define xtanhf nsimd_sleef_tanh_u10_vsx_f32 +#define xsinh_u35 nsimd_sleef_sinh_u35_vsx_f64 +#define xsinhf_u35 nsimd_sleef_sinh_u35_vsx_f32 +#define xcosh_u35 nsimd_sleef_cosh_u35_vsx_f64 +#define xcoshf_u35 nsimd_sleef_cosh_u35_vsx_f32 +#define xtanh_u35 nsimd_sleef_tanh_u35_vsx_f64 +#define xtanhf_u35 nsimd_sleef_tanh_u35_vsx_f32 +#define xfastsin_u3500 nsimd_sleef_fastsin_u3500_vsx_f64 +#define xfastsinf_u3500 nsimd_sleef_fastsin_u3500_vsx_f32 +#define xfastcos_u3500 nsimd_sleef_fastcos_u3500_vsx_f64 +#define xfastcosf_u3500 nsimd_sleef_fastcos_u3500_vsx_f32 +#define xfastpow_u3500 nsimd_sleef_fastpow_u3500_vsx_f64 +#define xfastpowf_u3500 nsimd_sleef_fastpow_u3500_vsx_f32 +#define xasinh nsimd_sleef_asinh_u10_vsx_f64 +#define xasinhf nsimd_sleef_asinh_u10_vsx_f32 +#define xacosh nsimd_sleef_acosh_u10_vsx_f64 +#define xacoshf nsimd_sleef_acosh_u10_vsx_f32 +#define xatanh nsimd_sleef_atanh_u10_vsx_f64 +#define xatanhf nsimd_sleef_atanh_u10_vsx_f32 +#define xexp2 nsimd_sleef_exp2_u10_vsx_f64 +#define xexp2f nsimd_sleef_exp2_u10_vsx_f32 +#define xexp2_u35 nsimd_sleef_exp2_u35_vsx_f64 +#define xexp2f_u35 nsimd_sleef_exp2_u35_vsx_f32 +#define xexp10 nsimd_sleef_exp10_u10_vsx_f64 +#define xexp10f nsimd_sleef_exp10_u10_vsx_f32 +#define xexp10_u35 nsimd_sleef_exp10_u35_vsx_f64 +#define xexp10f_u35 nsimd_sleef_exp10_u35_vsx_f32 +#define xexpm1 nsimd_sleef_expm1_u10_vsx_f64 +#define xexpm1f nsimd_sleef_expm1_u10_vsx_f32 +#define xlog10 nsimd_sleef_log10_u10_vsx_f64 +#define xlog10f nsimd_sleef_log10_u10_vsx_f32 +#define xlog2 nsimd_sleef_log2_u10_vsx_f64 +#define xlog2f nsimd_sleef_log2_u10_vsx_f32 +#define xlog2_u35 nsimd_sleef_log2_u35_vsx_f64 +#define xlog2f_u35 nsimd_sleef_log2_u35_vsx_f32 +#define xlog1p nsimd_sleef_log1p_u10_vsx_f64 +#define xlog1pf nsimd_sleef_log1p_u10_vsx_f32 +#define xsincospi_u05 nsimd_sleef_sincospi_u05_vsx_f64 +#define xsincospif_u05 nsimd_sleef_sincospi_u05_vsx_f32 +#define xsincospi_u35 nsimd_sleef_sincospi_u35_vsx_f64 +#define xsincospif_u35 nsimd_sleef_sincospi_u35_vsx_f32 +#define xsinpi_u05 nsimd_sleef_sinpi_u05_vsx_f64 +#define xsinpif_u05 nsimd_sleef_sinpi_u05_vsx_f32 +#define xcospi_u05 nsimd_sleef_cospi_u05_vsx_f64 +#define xcospif_u05 nsimd_sleef_cospi_u05_vsx_f32 +#define xldexp nsimd_sleef_ldexp_vsx_f64 +#define xldexpf nsimd_sleef_ldexp_vsx_f32 +#define xilogb nsimd_sleef_ilogb_vsx_f64 +#define xilogbf nsimd_sleef_ilogb_vsx_f32 +#define xfma nsimd_sleef_fma_vsx_f64 +#define xfmaf nsimd_sleef_fma_vsx_f32 +#define xsqrt nsimd_sleef_sqrt_vsx_f64 +#define xsqrtf nsimd_sleef_sqrt_vsx_f32 +#define xsqrt_u05 nsimd_sleef_sqrt_u05_vsx_f64 +#define xsqrtf_u05 nsimd_sleef_sqrt_u05_vsx_f32 +#define xsqrt_u35 nsimd_sleef_sqrt_u35_vsx_f64 +#define xsqrtf_u35 nsimd_sleef_sqrt_u35_vsx_f32 +#define xhypot_u05 nsimd_sleef_hypot_u05_vsx_f64 +#define xhypotf_u05 nsimd_sleef_hypot_u05_vsx_f32 +#define xhypot_u35 nsimd_sleef_hypot_u35_vsx_f64 +#define xhypotf_u35 nsimd_sleef_hypot_u35_vsx_f32 +#define xfabs nsimd_sleef_fabs_vsx_f64 +#define xfabsf nsimd_sleef_fabs_vsx_f32 +#define xcopysign nsimd_sleef_copysign_vsx_f64 +#define xcopysignf nsimd_sleef_copysign_vsx_f32 +#define xfmax nsimd_sleef_fmax_vsx_f64 +#define xfmaxf nsimd_sleef_fmax_vsx_f32 +#define xfmin nsimd_sleef_fmin_vsx_f64 +#define xfminf nsimd_sleef_fmin_vsx_f32 +#define xfdim nsimd_sleef_fdim_vsx_f64 +#define xfdimf nsimd_sleef_fdim_vsx_f32 +#define xtrunc nsimd_sleef_trunc_vsx_f64 +#define xtruncf nsimd_sleef_trunc_vsx_f32 +#define xfloor nsimd_sleef_floor_vsx_f64 +#define xfloorf nsimd_sleef_floor_vsx_f32 +#define xceil nsimd_sleef_ceil_vsx_f64 +#define xceilf nsimd_sleef_ceil_vsx_f32 +#define xround nsimd_sleef_round_vsx_f64 +#define xroundf nsimd_sleef_round_vsx_f32 +#define xrint nsimd_sleef_rint_vsx_f64 +#define xrintf nsimd_sleef_rint_vsx_f32 +#define xnextafter nsimd_sleef_nextafter_vsx_f64 +#define xnextafterf nsimd_sleef_nextafter_vsx_f32 +#define xfrfrexp nsimd_sleef_frfrexp_vsx_f64 +#define xfrfrexpf nsimd_sleef_frfrexp_vsx_f32 +#define xexpfrexp nsimd_sleef_expfrexp_vsx_f64 +#define xexpfrexpf nsimd_sleef_expfrexp_vsx_f32 +#define xfmod nsimd_sleef_fmod_vsx_f64 +#define xfmodf nsimd_sleef_fmod_vsx_f32 +#define xremainder nsimd_sleef_remainder_vsx_f64 +#define xremainderf nsimd_sleef_remainder_vsx_f32 +#define xmodf nsimd_sleef_modf_vsx_f64 +#define xmodff nsimd_sleef_modf_vsx_f32 +#define xlgamma_u1 nsimd_sleef_lgamma_u10_vsx_f64 +#define xlgammaf_u1 nsimd_sleef_lgamma_u10_vsx_f32 +#define xtgamma_u1 nsimd_sleef_tgamma_u10_vsx_f64 +#define xtgammaf_u1 nsimd_sleef_tgamma_u10_vsx_f32 +#define xerf_u1 nsimd_sleef_erf_u10_vsx_f64 +#define xerff_u1 nsimd_sleef_erf_u10_vsx_f32 +#define xerfc_u15 nsimd_sleef_erfc_u15_vsx_f64 +#define xerfcf_u15 nsimd_sleef_erfc_u15_vsx_f32 +#define xgetInt nsimd_sleef_getInt_vsx_f64 +#define xgetIntf nsimd_sleef_getInt_vsx_f32 +#define xgetPtr nsimd_sleef_getPtr_vsx_f64 +#define xgetPtrf nsimd_sleef_getPtr_vsx_f32 + + #endif + + #define rempi nsimd_sleef_rempi_vsx + #define rempif nsimd_sleef_rempif_vsx + #define rempisub nsimd_sleef_rempisub_vsx + #define rempisubf nsimd_sleef_rempisubf_vsx + #define gammak nsimd_gammak_vsx + #define gammafk nsimd_gammafk_vsx + + #endif + + + +#endif + diff --git a/src/sleefdp.c b/src/sleefdp.c new file mode 100644 index 00000000..6912221b --- /dev/null +++ b/src/sleefdp.c @@ -0,0 +1,2679 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// Always use -ffp-contract=off option to compile SLEEF. + +#include +#include +#include +#include +#include + +#ifndef ENABLE_BUILTIN_MATH +#include +#define SQRT sqrt +#else +#define SQRT __builtin_sqrt +#endif + +#include "misc.h" + +extern const double Sleef_rempitabdp[]; + +#ifdef DORENAME +#include "rename.h" +#endif + +#if (defined(_MSC_VER)) +#pragma fp_contract (off) +#endif + +#define MLA mla +#define C2V(x) (x) +#include "estrin.h" + +static INLINE CONST int64_t doubleToRawLongBits(double d) { + union { + double f; + int64_t i; + } tmp; + tmp.f = d; + return tmp.i; +} + +static INLINE CONST double longBitsToDouble(int64_t i) { + union { + double f; + int64_t i; + } tmp; + tmp.i = i; + return tmp.f; +} + +static INLINE CONST double fabsk(double x) { + return longBitsToDouble(INT64_C(0x7fffffffffffffff) & doubleToRawLongBits(x)); +} + +static INLINE CONST double mulsign(double x, double y) { + return longBitsToDouble(doubleToRawLongBits(x) ^ (doubleToRawLongBits(y) & (INT64_C(1) << 63))); +} + +static INLINE CONST double copysignk(double x, double y) { + return longBitsToDouble((doubleToRawLongBits(x) & ~(INT64_C(1) << 63)) ^ (doubleToRawLongBits(y) & (INT64_C(1) << 63))); +} + +static INLINE CONST double sign(double d) { return mulsign(1, d); } +static INLINE CONST double mla(double x, double y, double z) { return x * y + z; } +static INLINE CONST double rintk(double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); } +static INLINE CONST int ceilk(double x) { return (int)x + (x < 0 ? 0 : 1); } +static INLINE CONST double trunck(double x) { return (double)(int)x; } +static INLINE CONST double fmink(double x, double y) { return x < y ? x : y; } +static INLINE CONST double fmaxk(double x, double y) { return x > y ? x : y; } + +static INLINE CONST int xisnan(double x) { return x != x; } +static INLINE CONST int xisinf(double x) { return x == SLEEF_INFINITY || x == -SLEEF_INFINITY; } +static INLINE CONST int xisminf(double x) { return x == -SLEEF_INFINITY; } +static INLINE CONST int xispinf(double x) { return x == SLEEF_INFINITY; } +static INLINE CONST int xisnegzero(double x) { return doubleToRawLongBits(x) == doubleToRawLongBits(-0.0); } +static INLINE CONST int xisnumber(double x) { return !xisinf(x) && !xisnan(x); } + +static INLINE CONST int xisint(double d) { + double x = d - (double)(INT64_C(1) << 31) * (int)(d * (1.0 / (INT64_C(1) << 31))); + return (x == (int)x) || (fabsk(d) >= (double)(INT64_C(1) << 53)); +} + +static INLINE CONST int xisodd(double d) { + double x = d - (double)(INT64_C(1) << 31) * (int)(d * (1.0 / (INT64_C(1) << 31))); + return (1 & (int)x) != 0 && fabsk(d) < (double)(INT64_C(1) << 53); +} + +static INLINE CONST double pow2i(int q) { + return longBitsToDouble(((int64_t)(q + 0x3ff)) << 52); +} + +static INLINE CONST double ldexpk(double x, int q) { + double u; + int m; + m = q >> 31; + m = (((m + q) >> 9) - m) << 7; + q = q - (m << 2); + m += 0x3ff; + m = m < 0 ? 0 : m; + m = m > 0x7ff ? 0x7ff : m; + u = longBitsToDouble(((int64_t)m) << 52); + x = x * u * u * u * u; + u = longBitsToDouble(((int64_t)(q + 0x3ff)) << 52); + return x * u; +} + +static INLINE CONST double ldexp2k(double d, int e) { // faster than ldexpk, short reach + return d * pow2i(e >> 1) * pow2i(e - (e >> 1)); +} + +static INLINE CONST double ldexp3k(double d, int e) { // very fast, no denormal + return longBitsToDouble(doubleToRawLongBits(d) + (((int64_t)e) << 52)); +} + +EXPORT CONST double xldexp(double x, int exp) { + if (exp > 2100) exp = 2100; + if (exp < -2100) exp = -2100; + + int e0 = exp >> 2; + if (exp < 0) e0++; + if (-100 < exp && exp < 100) e0 = 0; + int e1 = exp - (e0 << 2); + + double p = pow2i(e0); + double ret = x * pow2i(e1) * p * p * p * p; + + return ret; +} + +static INLINE CONST int ilogbk(double d) { + int m = d < 4.9090934652977266E-91; + d = m ? 2.037035976334486E90 * d : d; + int q = (doubleToRawLongBits(d) >> 52) & 0x7ff; + q = m ? q - (300 + 0x03ff) : q - 0x03ff; + return q; +} + +// ilogb2k is similar to ilogbk, but the argument has to be a +// normalized FP value. +static INLINE CONST int ilogb2k(double d) { + return ((doubleToRawLongBits(d) >> 52) & 0x7ff) - 0x3ff; +} + +EXPORT CONST int xilogb(double d) { + int e = ilogbk(fabsk(d)); + e = d == 0.0 ? SLEEF_FP_ILOGB0 : e; + e = xisnan(d) ? SLEEF_FP_ILOGBNAN : e; + e = xisinf(d) ? INT_MAX : e; + return e; +} + +// + +#ifndef NDEBUG +static int checkfp(double x) { + if (xisinf(x) || xisnan(x)) return 1; + return 0; +} +#endif + +static INLINE CONST double upper(double d) { + return longBitsToDouble(doubleToRawLongBits(d) & INT64_C(0xfffffffff8000000)); +} + +static INLINE CONST Sleef_double2 dd(double h, double l) { + Sleef_double2 ret; + ret.x = h; ret.y = l; + return ret; +} + +static INLINE CONST Sleef_double2 ddnormalize_d2_d2(Sleef_double2 t) { + Sleef_double2 s; + + s.x = t.x + t.y; + s.y = t.x - s.x + t.y; + + return s; +} + +static INLINE CONST Sleef_double2 ddscale_d2_d2_d(Sleef_double2 d, double s) { + Sleef_double2 r; + + r.x = d.x * s; + r.y = d.y * s; + + return r; +} + +static INLINE CONST Sleef_double2 ddneg_d2_d2(Sleef_double2 d) { + Sleef_double2 r; + + r.x = -d.x; + r.y = -d.y; + + return r; +} + +static INLINE CONST Sleef_double2 ddabs_d2_d2(Sleef_double2 x) { + return dd(x.x < 0 ? -x.x : x.x, x.x < 0 ? -x.y : x.y); +} + +/* + * ddadd and ddadd2 are functions for double-double addition. ddadd + * is simpler and faster than ddadd2, but it requires the absolute + * value of first argument to be larger than the second argument. The + * exact condition that should be met is checked if NDEBUG macro is + * not defined. + * + * Please note that if the results won't be used, it is no problem to + * feed arguments that do not meet this condition. You will see + * warning messages if you turn off NDEBUG macro and run tester2, but + * this is normal. + * + * Please see : + * Jonathan Richard Shewchuk, Adaptive Precision Floating-Point + * Arithmetic and Fast Robust Geometric Predicates, Discrete & + * Computational Geometry 18:305-363, 1997. + */ + +static INLINE CONST Sleef_double2 ddadd_d2_d_d(double x, double y) { + // |x| >= |y| + + Sleef_double2 r; + +#ifndef NDEBUG + if (!(checkfp(x) || checkfp(y) || fabsk(x) >= fabsk(y) || (fabsk(x+y) <= fabsk(x) && fabsk(x+y) <= fabsk(y)))) { + fprintf(stderr, "[ddadd_d2_d_d : %g, %g]\n", x, y); + fflush(stderr); + } +#endif + + r.x = x + y; + r.y = x - r.x + y; + + return r; +} + +static INLINE CONST Sleef_double2 ddadd2_d2_d_d(double x, double y) { + Sleef_double2 r; + + r.x = x + y; + double v = r.x - x; + r.y = (x - (r.x - v)) + (y - v); + + return r; +} + +static INLINE CONST Sleef_double2 ddadd_d2_d2_d(Sleef_double2 x, double y) { + // |x| >= |y| + + Sleef_double2 r; + +#ifndef NDEBUG + if (!(checkfp(x.x) || checkfp(y) || fabsk(x.x) >= fabsk(y) || (fabsk(x.x+y) <= fabsk(x.x) && fabsk(x.x+y) <= fabsk(y)))) { + fprintf(stderr, "[ddadd_d2_d2_d : %g %g]\n", x.x, y); + fflush(stderr); + } +#endif + + r.x = x.x + y; + r.y = x.x - r.x + y + x.y; + + return r; +} + +static INLINE CONST Sleef_double2 ddadd2_d2_d2_d(Sleef_double2 x, double y) { + Sleef_double2 r; + + r.x = x.x + y; + double v = r.x - x.x; + r.y = (x.x - (r.x - v)) + (y - v); + r.y += x.y; + + return r; +} + +static INLINE CONST Sleef_double2 ddadd_d2_d_d2(double x, Sleef_double2 y) { + // |x| >= |y| + + Sleef_double2 r; + +#ifndef NDEBUG + if (!(checkfp(x) || checkfp(y.x) || fabsk(x) >= fabsk(y.x) || (fabsk(x+y.x) <= fabsk(x) && fabsk(x+y.x) <= fabsk(y.x)))) { + fprintf(stderr, "[ddadd_d2_d_d2 : %g %g]\n", x, y.x); + fflush(stderr); + } +#endif + + r.x = x + y.x; + r.y = x - r.x + y.x + y.y; + + return r; +} + +static INLINE CONST Sleef_double2 ddadd2_d2_d_d2(double x, Sleef_double2 y) { + Sleef_double2 r; + + r.x = x + y.x; + double v = r.x - x; + r.y = (x - (r.x - v)) + (y.x - v) + y.y; + + return r; +} + +static INLINE CONST double ddadd2_d_d_d2(double x, Sleef_double2 y) { return y.y + y.x + x; } + +static INLINE CONST Sleef_double2 ddadd_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { + // |x| >= |y| + + Sleef_double2 r; + +#ifndef NDEBUG + if (!(x.x == 0 || checkfp(x.x) || checkfp(y.x) || fabsk(x.x) >= fabsk(y.x) || (fabsk(x.x+y.x) <= fabsk(x.x) && fabsk(x.x+y.x) <= fabsk(y.x)))) { + fprintf(stderr, "[ddadd_d2_d2_d2 : %g %g]\n", x.x, y.x); + fflush(stderr); + } +#endif + + r.x = x.x + y.x; + r.y = x.x - r.x + y.x + x.y + y.y; + + return r; +} + +static INLINE CONST Sleef_double2 ddadd2_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { + Sleef_double2 r; + + r.x = x.x + y.x; + double v = r.x - x.x; + r.y = (x.x - (r.x - v)) + (y.x - v); + r.y += x.y + y.y; + + return r; +} + +static INLINE CONST Sleef_double2 ddsub_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { + // |x| >= |y| + + Sleef_double2 r; + +#ifndef NDEBUG + if (!(checkfp(x.x) || checkfp(y.x) || fabsk(x.x) >= fabsk(y.x) || (fabsk(x.x-y.x) <= fabsk(x.x) && fabsk(x.x-y.x) <= fabsk(y.x)))) { + fprintf(stderr, "[ddsub_d2_d2_d2 : %g %g]\n", x.x, y.x); + fflush(stderr); + } +#endif + + r.x = x.x - y.x; + r.y = x.x - r.x - y.x + x.y - y.y; + + return r; +} + +static INLINE CONST Sleef_double2 dddiv_d2_d2_d2(Sleef_double2 n, Sleef_double2 d) { + double t = 1.0 / d.x; + double dh = upper(d.x), dl = d.x - dh; + double th = upper(t ), tl = t - th; + double nhh = upper(n.x), nhl = n.x - nhh; + + Sleef_double2 q; + + q.x = n.x * t; + + double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); + + q.y = t * (n.y - q.x * d.y) + u; + + return q; +} + +static INLINE CONST Sleef_double2 ddmul_d2_d_d(double x, double y) { + double xh = upper(x), xl = x - xh; + double yh = upper(y), yl = y - yh; + Sleef_double2 r; + + r.x = x * y; + r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; + + return r; +} + +static INLINE CONST Sleef_double2 ddmul_d2_d2_d(Sleef_double2 x, double y) { + double xh = upper(x.x), xl = x.x - xh; + double yh = upper(y ), yl = y - yh; + Sleef_double2 r; + + r.x = x.x * y; + r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; + + return r; +} + +static INLINE CONST Sleef_double2 ddmul_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { + double xh = upper(x.x), xl = x.x - xh; + double yh = upper(y.x), yl = y.x - yh; + Sleef_double2 r; + + r.x = x.x * y.x; + r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; + + return r; +} + +static INLINE CONST double ddmul_d_d2_d2(Sleef_double2 x, Sleef_double2 y) { + double xh = upper(x.x), xl = x.x - xh; + double yh = upper(y.x), yl = y.x - yh; + + return x.y * yh + xh * y.y + xl * yl + xh * yl + xl * yh + xh * yh; +} + +static INLINE CONST Sleef_double2 ddsqu_d2_d2(Sleef_double2 x) { + double xh = upper(x.x), xl = x.x - xh; + Sleef_double2 r; + + r.x = x.x * x.x; + r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); + + return r; +} + +static INLINE CONST double ddsqu_d_d2(Sleef_double2 x) { + double xh = upper(x.x), xl = x.x - xh; + + return xh * x.y + xh * x.y + xl * xl + (xh * xl + xh * xl) + xh * xh; +} + +static INLINE CONST Sleef_double2 ddrec_d2_d(double d) { + double t = 1.0 / d; + double dh = upper(d), dl = d - dh; + double th = upper(t), tl = t - th; + Sleef_double2 q; + + q.x = t; + q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); + + return q; +} + +static INLINE CONST Sleef_double2 ddrec_d2_d2(Sleef_double2 d) { + double t = 1.0 / d.x; + double dh = upper(d.x), dl = d.x - dh; + double th = upper(t ), tl = t - th; + Sleef_double2 q; + + q.x = t; + q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); + + return q; +} + +static INLINE CONST Sleef_double2 ddsqrt_d2_d2(Sleef_double2 d) { + double t = SQRT(d.x + d.y); + return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d2_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5); +} + +static INLINE CONST Sleef_double2 ddsqrt_d2_d(double d) { + double t = SQRT(d); + return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5); +} + +// + +static INLINE CONST double atan2k(double y, double x) { + double s, t, u; + int q = 0; + + if (x < 0) { x = -x; q = -2; } + if (y > x) { t = x; x = y; y = -t; q += 1; } + + s = y / x; + t = s * s; + + double t2 = t * t, t4 = t2 * t2, t8 = t4 * t4, t16 = t8 * t8; + u = POLY19(t, t2, t4, t8, t16, + -1.88796008463073496563746e-05, + 0.000209850076645816976906797, + -0.00110611831486672482563471, + 0.00370026744188713119232403, + -0.00889896195887655491740809, + 0.016599329773529201970117, + -0.0254517624932312641616861, + 0.0337852580001353069993897, + -0.0407629191276836500001934, + 0.0466667150077840625632675, + -0.0523674852303482457616113, + 0.0587666392926673580854313, + -0.0666573579361080525984562, + 0.0769219538311769618355029, + -0.090908995008245008229153, + 0.111111105648261418443745, + -0.14285714266771329383765, + 0.199999999996591265594148, + -0.333333333333311110369124); + + t = u * t * s + s; + t = q * (M_PI/2) + t; + + return t; +} + +EXPORT CONST double xatan2(double y, double x) { + double r = atan2k(fabsk(y), x); + + r = mulsign(r, x); + if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI /2)) : 0); + if (xisinf(y) ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0); + if ( y == 0) r = (sign(x) == -1 ? M_PI : 0); + + return xisnan(x) || xisnan(y) ? SLEEF_NAN : mulsign(r, y); +} + +EXPORT CONST double xasin(double d) { + int o = fabsk(d) < 0.5; + double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), x = o ? fabsk(d) : SQRT(x2), u; + + double x4 = x2 * x2, x8 = x4 * x4, x16 = x8 * x8; + u = POLY12(x2, x4, x8, x16, + +0.3161587650653934628e-1, + -0.1581918243329996643e-1, + +0.1929045477267910674e-1, + +0.6606077476277170610e-2, + +0.1215360525577377331e-1, + +0.1388715184501609218e-1, + +0.1735956991223614604e-1, + +0.2237176181932048341e-1, + +0.3038195928038132237e-1, + +0.4464285681377102438e-1, + +0.7500000000378581611e-1, + +0.1666666666666497543e+0); + + u = mla(u, x * x2, x); + + double r = o ? u : (M_PI/2 - 2*u); + r = mulsign(r, d); + + return r; +} + +EXPORT CONST double xacos(double d) { + int o = fabsk(d) < 0.5; + double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u; + double x = o ? fabsk(d) : SQRT(x2); + x = fabsk(d) == 1.0 ? 0 : x; + + double x4 = x2 * x2, x8 = x4 * x4, x16 = x8 * x8; + u = POLY12(x2, x4, x8, x16, + +0.3161587650653934628e-1, + -0.1581918243329996643e-1, + +0.1929045477267910674e-1, + +0.6606077476277170610e-2, + +0.1215360525577377331e-1, + +0.1388715184501609218e-1, + +0.1735956991223614604e-1, + +0.2237176181932048341e-1, + +0.3038195928038132237e-1, + +0.4464285681377102438e-1, + +0.7500000000378581611e-1, + +0.1666666666666497543e+0); + + u *= x * x2; + + double y = 3.1415926535897932/2 - (mulsign(x, d) + mulsign(u, d)); + x += u; + double r = o ? y : (x*2); + if (!o && d < 0) r = ddadd_d2_d2_d(dd(3.141592653589793116, 1.2246467991473532072e-16), -r).x; + + return r; +} + +EXPORT CONST double xatan(double s) { + double t, u; + int q = 0; + + if (sign(s) == -1) { s = -s; q = 2; } + if (s > 1) { s = 1.0 / s; q |= 1; } + + t = s * s; + + double t2 = t * t, t4 = t2 * t2, t8 = t4 * t4, t16 = t8 * t8; + u = POLY19(t, t2, t4, t8, t16, + -1.88796008463073496563746e-05, + 0.000209850076645816976906797, + -0.00110611831486672482563471, + 0.00370026744188713119232403, + -0.00889896195887655491740809, + 0.016599329773529201970117, + -0.0254517624932312641616861, + 0.0337852580001353069993897, + -0.0407629191276836500001934, + 0.0466667150077840625632675, + -0.0523674852303482457616113, + 0.0587666392926673580854313, + -0.0666573579361080525984562, + 0.0769219538311769618355029, + -0.090908995008245008229153, + 0.111111105648261418443745, + -0.14285714266771329383765, + 0.199999999996591265594148, + -0.333333333333311110369124); + + t = s + s * (t * u); + + if ((q & 1) != 0) t = 1.570796326794896557998982 - t; + if ((q & 2) != 0) t = -t; + + return t; +} + +static Sleef_double2 atan2k_u1(Sleef_double2 y, Sleef_double2 x) { + double u; + Sleef_double2 s, t; + int q = 0; + + if (x.x < 0) { x.x = -x.x; x.y = -x.y; q = -2; } + if (y.x > x.x) { t = x; x = y; y.x = -t.x; y.y = -t.y; q += 1; } + + s = dddiv_d2_d2_d2(y, x); + t = ddsqu_d2_d2(s); + t = ddnormalize_d2_d2(t); + + double t2 = t.x * t.x, t4 = t2 * t2, t8 = t4 * t4, t16 = t8 * t8; + u = POLY16(t.x, t2, t4, t8, + 1.06298484191448746607415e-05, + -0.000125620649967286867384336, + 0.00070557664296393412389774, + -0.00251865614498713360352999, + 0.00646262899036991172313504, + -0.0128281333663399031014274, + 0.0208024799924145797902497, + -0.0289002344784740315686289, + 0.0359785005035104590853656, + -0.041848579703592507506027, + 0.0470843011653283988193763, + -0.0524914210588448421068719, + 0.0587946590969581003860434, + -0.0666620884778795497194182, + 0.0769225330296203768654095, + -0.0909090442773387574781907); + u = mla(u, t.x, 0.111111108376896236538123); + u = mla(u, t.x, -0.142857142756268568062339); + u = mla(u, t.x, 0.199999999997977351284817); + u = mla(u, t.x, -0.333333333333317605173818); + + t = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddmul_d2_d2_d2(s, t), u)); + + if (fabsk(s.x) < 1e-200) t = s; + t = ddadd2_d2_d2_d2(ddmul_d2_d2_d(dd(1.570796326794896557998982, 6.12323399573676603586882e-17), q), t); + + return t; +} + +EXPORT CONST double xatan2_u1(double y, double x) { + if (fabsk(x) < 5.5626846462680083984e-309) { y *= (UINT64_C(1) << 53); x *= (UINT64_C(1) << 53); } // nexttoward((1.0 / DBL_MAX), 1) + Sleef_double2 d = atan2k_u1(dd(fabsk(y), 0), dd(x, 0)); + double r = d.x + d.y; + + r = mulsign(r, x); + if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI /2)) : 0); + if (xisinf(y) ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0); + if ( y == 0) r = (sign(x) == -1 ? M_PI : 0); + + return xisnan(x) || xisnan(y) ? SLEEF_NAN : mulsign(r, y); +} + +EXPORT CONST double xasin_u1(double d) { + int o = fabsk(d) < 0.5; + double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u; + Sleef_double2 x = o ? dd(fabsk(d), 0) : ddsqrt_d2_d(x2); + x = fabsk(d) == 1.0 ? dd(0, 0) : x; + + double x4 = x2 * x2, x8 = x4 * x4, x16 = x8 * x8; + u = POLY12(x2, x4, x8, x16, + +0.3161587650653934628e-1, + -0.1581918243329996643e-1, + +0.1929045477267910674e-1, + +0.6606077476277170610e-2, + +0.1215360525577377331e-1, + +0.1388715184501609218e-1, + +0.1735956991223614604e-1, + +0.2237176181932048341e-1, + +0.3038195928038132237e-1, + +0.4464285681377102438e-1, + +0.7500000000378581611e-1, + +0.1666666666666497543e+0); + + u *= x2 * x.x; + + Sleef_double2 y = ddadd_d2_d2_d(ddsub_d2_d2_d2(dd(3.141592653589793116/4, 1.2246467991473532072e-16/4), x), -u); + double r = o ? (u + x.x) : ((y.x + y.y)*2); + r = mulsign(r, d); + + return r; +} + +EXPORT CONST double xacos_u1(double d) { + int o = fabsk(d) < 0.5; + double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u; + Sleef_double2 x = o ? dd(fabsk(d), 0) : ddsqrt_d2_d(x2), w; + x = fabsk(d) == 1.0 ? dd(0, 0) : x; + + double x4 = x2 * x2, x8 = x4 * x4, x16 = x8 * x8; + u = POLY12(x2, x4, x8, x16, + +0.3161587650653934628e-1, + -0.1581918243329996643e-1, + +0.1929045477267910674e-1, + +0.6606077476277170610e-2, + +0.1215360525577377331e-1, + +0.1388715184501609218e-1, + +0.1735956991223614604e-1, + +0.2237176181932048341e-1, + +0.3038195928038132237e-1, + +0.4464285681377102438e-1, + +0.7500000000378581611e-1, + +0.1666666666666497543e+0); + + u *= x.x * x2; + + Sleef_double2 y = ddsub_d2_d2_d2(dd(3.141592653589793116/2, 1.2246467991473532072e-16/2), + ddadd_d2_d_d(mulsign(x.x, d), mulsign(u, d))); + x = ddadd_d2_d2_d(x, u); + y = o ? y : ddscale_d2_d2_d(x, 2); + if (!o && d < 0) y = ddsub_d2_d2_d2(dd(3.141592653589793116, 1.2246467991473532072e-16), y); + + return y.x + y.y; +} + +EXPORT CONST double xatan_u1(double d) { + Sleef_double2 d2 = atan2k_u1(dd(fabsk(d), 0), dd(1, 0)); + double r = d2.x + d2.y; + if (xisinf(d)) r = 1.570796326794896557998982; + return mulsign(r, d); +} + +typedef struct { + double d; + int32_t i; +} di_t; + +typedef struct { + Sleef_double2 dd; + int32_t i; +} ddi_t; + +static INLINE CONST double orsign(double x, double y) { + return longBitsToDouble(doubleToRawLongBits(x) | (doubleToRawLongBits(y) & (INT64_C(1) << 63))); +} + +static CONST di_t rempisub(double x) { + // This function is equivalent to : + // di_t ret = { x - rint(4 * x) * 0.25, (int32_t)(rint(4 * x) - rint(x) * 4) }; + di_t ret; + double c = mulsign(INT64_C(1) << 52, x); + double rint4x = fabsk(4*x) > INT64_C(1) << 52 ? (4*x) : orsign(mla(4, x, c) - c, x); + double rintx = fabsk( x) > INT64_C(1) << 52 ? x : orsign(x + c - c , x); + ret.d = mla(-0.25, rint4x, x); + ret.i = mla(-4 , rintx , rint4x); + return ret; +} + +// Payne-Hanek like argument reduction +static CONST ddi_t rempi(double a) { + Sleef_double2 x, y, z; + di_t di; + double t; + int ex = ilogb2k(a) - 55, q = ex > (700-55) ? -64 : 0; + a = ldexp3k(a, q); + if (ex < 0) ex = 0; + ex *= 4; + x = ddmul_d2_d_d(a, Sleef_rempitabdp[ex]); + di = rempisub(x.x); + q = di.i; + x.x = di.d; + x = ddnormalize_d2_d2(x); + y = ddmul_d2_d_d(a, Sleef_rempitabdp[ex+1]); + x = ddadd2_d2_d2_d2(x, y); + di = rempisub(x.x); + q += di.i; + x.x = di.d; + x = ddnormalize_d2_d2(x); + y = ddmul_d2_d2_d(dd(Sleef_rempitabdp[ex+2], Sleef_rempitabdp[ex+3]), a); + x = ddadd2_d2_d2_d2(x, y); + x = ddnormalize_d2_d2(x); + x = ddmul_d2_d2_d2(x, dd(3.141592653589793116*2, 1.2246467991473532072e-16*2)); + ddi_t ret = { fabsk(a) < 0.7 ? dd(a, 0) : x, q }; + return ret; +} + +EXPORT CONST double xsin(double d) { + double u, s, t = d; + int ql; + + if (fabsk(d) < TRIGRANGEMAX2) { + ql = rintk(d * M_1_PI); + d = mla(ql, -PI_A2, d); + d = mla(ql, -PI_B2, d); + } else if (fabsk(d) < TRIGRANGEMAX) { + double dqh = trunck(d * (M_1_PI / (1 << 24))) * (double)(1 << 24); + ql = rintk(mla(d, M_1_PI, -dqh)); + + d = mla(dqh, -PI_A, d); + d = mla( ql, -PI_A, d); + d = mla(dqh, -PI_B, d); + d = mla( ql, -PI_B, d); + d = mla(dqh, -PI_C, d); + d = mla( ql, -PI_C, d); + d = mla(dqh + ql, -PI_D, d); + } else { + ddi_t ddi = rempi(t); + ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 1) >> 2; + if ((ddi.i & 1) != 0) { + ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x), + mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x))); + } + d = ddi.dd.x + ddi.dd.y; + if (xisinf(t) || xisnan(t)) d = SLEEF_NAN; + } + + s = d * d; + + if ((ql & 1) != 0) d = -d; + + double s2 = s * s, s4 = s2 * s2; + u = POLY8(s, s2, s4, + -7.97255955009037868891952e-18, + 2.81009972710863200091251e-15, + -7.64712219118158833288484e-13, + 1.60590430605664501629054e-10, + -2.50521083763502045810755e-08, + 2.75573192239198747630416e-06, + -0.000198412698412696162806809, + 0.00833333333333332974823815); + u = mla(u, s, -0.166666666666666657414808); + + u = mla(s, u * d, d); + + if (xisnegzero(t)) u = t; + + return u; +} + +EXPORT CONST double xsin_u1(double d) { + double u; + Sleef_double2 s, t, x; + int ql; + + if (fabsk(d) < TRIGRANGEMAX2) { + ql = rintk(d * M_1_PI); + u = mla(ql, -PI_A2, d); + s = ddadd_d2_d_d (u, ql * -PI_B2); + } else if (fabsk(d) < TRIGRANGEMAX) { + const double dqh = trunck(d * (M_1_PI / (1 << 24))) * (double)(1 << 24); + ql = rintk(mla(d, M_1_PI, -dqh)); + + u = mla(dqh, -PI_A, d); + s = ddadd_d2_d_d (u, ql * -PI_A); + s = ddadd2_d2_d2_d(s, dqh * -PI_B); + s = ddadd2_d2_d2_d(s, ql * -PI_B); + s = ddadd2_d2_d2_d(s, dqh * -PI_C); + s = ddadd2_d2_d2_d(s, ql * -PI_C); + s = ddadd_d2_d2_d (s, (dqh + ql) * -PI_D); + } else { + ddi_t ddi = rempi(d); + ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 1) >> 2; + if ((ddi.i & 1) != 0) { + ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x), + mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x))); + } + s = ddnormalize_d2_d2(ddi.dd); + if (xisinf(d) || xisnan(d)) s.x = SLEEF_NAN; + } + + t = s; + s = ddsqu_d2_d2(s); + + double s2 = s.x * s.x, s4 = s2 * s2; + u = POLY6(s.x, s2, s4, + 2.72052416138529567917983e-15, + -7.6429259411395447190023e-13, + 1.60589370117277896211623e-10, + -2.5052106814843123359368e-08, + 2.75573192104428224777379e-06, + -0.000198412698412046454654947); + u = mla(u, s.x, 0.00833333333333318056201922); + + x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s)); + u = ddmul_d_d2_d2(t, x); + + if ((ql & 1) != 0) u = -u; + if (xisnegzero(d)) u = d; + + return u; +} + +EXPORT CONST double xcos(double d) { + double u, s, t = d; + int ql; + + if (fabsk(d) < TRIGRANGEMAX2) { + ql = mla(2, rintk(d * M_1_PI - 0.5), 1); + d = mla(ql, -PI_A2*0.5, d); + d = mla(ql, -PI_B2*0.5, d); + } else if (fabsk(d) < TRIGRANGEMAX) { + double dqh = trunck(d * (M_1_PI / (INT64_C(1) << 23)) - 0.5 * (M_1_PI / (INT64_C(1) << 23))); + ql = 2*rintk(d * M_1_PI - 0.5 - dqh * (double)(INT64_C(1) << 23))+1; + dqh *= 1 << 24; + + d = mla(dqh, -PI_A*0.5, d); + d = mla( ql, -PI_A*0.5, d); + d = mla(dqh, -PI_B*0.5, d); + d = mla( ql, -PI_B*0.5, d); + d = mla(dqh, -PI_C*0.5, d); + d = mla( ql, -PI_C*0.5, d); + d = mla(dqh + ql , -PI_D*0.5, d); + } else { + ddi_t ddi = rempi(t); + ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 7) >> 1; + if ((ddi.i & 1) == 0) { + ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x > 0 ? 1 : -1), + mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x > 0 ? 1 : -1))); + } + d = ddi.dd.x + ddi.dd.y; + if (xisinf(t) || xisnan(t)) d = SLEEF_NAN; + } + + s = d * d; + + if ((ql & 2) == 0) d = -d; + + double s2 = s * s, s4 = s2 * s2; + u = POLY8(s, s2, s4, + -7.97255955009037868891952e-18, + 2.81009972710863200091251e-15, + -7.64712219118158833288484e-13, + 1.60590430605664501629054e-10, + -2.50521083763502045810755e-08, + 2.75573192239198747630416e-06, + -0.000198412698412696162806809, + 0.00833333333333332974823815); + u = mla(u, s, -0.166666666666666657414808); + + u = mla(s, u * d, d); + + return u; +} + +EXPORT CONST double xcos_u1(double d) { + double u; + Sleef_double2 s, t, x; + int ql; + + d = fabsk(d); + + if (d < TRIGRANGEMAX2) { + ql = mla(2, rintk(d * M_1_PI - 0.5), 1); + s = ddadd2_d2_d_d(d, ql * (-PI_A2*0.5)); + s = ddadd_d2_d2_d(s, ql * (-PI_B2*0.5)); + } else if (d < TRIGRANGEMAX) { + double dqh = trunck(d * (M_1_PI / (INT64_C(1) << 23)) - 0.5 * (M_1_PI / (INT64_C(1) << 23))); + ql = 2*rintk(d * M_1_PI - 0.5 - dqh * (double)(INT64_C(1) << 23))+1; + dqh *= 1 << 24; + + u = mla(dqh, -PI_A*0.5, d); + s = ddadd2_d2_d_d (u, ql * (-PI_A*0.5)); + s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5)); + s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5)); + s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5)); + s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5)); + s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5)); + } else { + ddi_t ddi = rempi(d); + ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 7) >> 1; + if ((ddi.i & 1) == 0) { + ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x > 0 ? 1 : -1), + mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x > 0 ? 1 : -1))); + } + s = ddnormalize_d2_d2(ddi.dd); + if (xisinf(d) || xisnan(d)) s.x = SLEEF_NAN; + } + + t = s; + s = ddsqu_d2_d2(s); + + double s2 = s.x * s.x, s4 = s2 * s2; + u = POLY6(s.x, s2, s4, + 2.72052416138529567917983e-15, + -7.6429259411395447190023e-13, + 1.60589370117277896211623e-10, + -2.5052106814843123359368e-08, + 2.75573192104428224777379e-06, + -0.000198412698412046454654947); + u = mla(u, s.x, 0.00833333333333318056201922); + + x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s)); + u = ddmul_d_d2_d2(t, x); + + if ((((int)ql) & 2) == 0) u = -u; + + return u; +} + +EXPORT CONST Sleef_double2 xsincos(double d) { + double u, s, t; + Sleef_double2 r; + int ql; + + s = d; + + if (fabsk(d) < TRIGRANGEMAX2) { + ql = rintk(s * (2 * M_1_PI)); + s = mla(ql, -PI_A2*0.5, s); + s = mla(ql, -PI_B2*0.5, s); + } else if (fabsk(d) < TRIGRANGEMAX) { + double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24); + ql = rintk(d * (2 * M_1_PI) - dqh); + + s = mla(dqh, -PI_A * 0.5, s); + s = mla( ql, -PI_A * 0.5, s); + s = mla(dqh, -PI_B * 0.5, s); + s = mla( ql, -PI_B * 0.5, s); + s = mla(dqh, -PI_C * 0.5, s); + s = mla( ql, -PI_C * 0.5, s); + s = mla(dqh + ql, -PI_D * 0.5, s); + } else { + ddi_t ddi = rempi(d); + ql = ddi.i; + s = ddi.dd.x + ddi.dd.y; + if (xisinf(d) || xisnan(d)) s = SLEEF_NAN; + } + + t = s; + + s = s * s; + + u = 1.58938307283228937328511e-10; + u = mla(u, s, -2.50506943502539773349318e-08); + u = mla(u, s, 2.75573131776846360512547e-06); + u = mla(u, s, -0.000198412698278911770864914); + u = mla(u, s, 0.0083333333333191845961746); + u = mla(u, s, -0.166666666666666130709393); + u = u * s * t; + + r.x = t + u; + + if (xisnegzero(d)) r.x = -0.0; + + u = -1.13615350239097429531523e-11; + u = mla(u, s, 2.08757471207040055479366e-09); + u = mla(u, s, -2.75573144028847567498567e-07); + u = mla(u, s, 2.48015872890001867311915e-05); + u = mla(u, s, -0.00138888888888714019282329); + u = mla(u, s, 0.0416666666666665519592062); + u = mla(u, s, -0.5); + + r.y = u * s + 1; + + if ((ql & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } + if ((ql & 2) != 0) { r.x = -r.x; } + if (((ql+1) & 2) != 0) { r.y = -r.y; } + + return r; +} + +EXPORT CONST Sleef_double2 xsincos_u1(double d) { + double u; + Sleef_double2 r, s, t, x; + int ql; + + if (fabsk(d) < TRIGRANGEMAX2) { + ql = rintk(d * (2 * M_1_PI)); + u = mla(ql, -PI_A2*0.5, d); + s = ddadd_d2_d_d (u, ql * (-PI_B2*0.5)); + } else if (fabsk(d) < TRIGRANGEMAX) { + const double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24); + ql = rintk(d * (2 * M_1_PI) - dqh); + + u = mla(dqh, -PI_A*0.5, d); + s = ddadd_d2_d_d(u, ql * (-PI_A*0.5)); + s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5)); + s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5)); + s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5)); + s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5)); + s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5)); + } else { + ddi_t ddi = rempi(d); + ql = ddi.i; + s = ddi.dd; + if (xisinf(d) || xisnan(d)) s = dd(SLEEF_NAN, SLEEF_NAN); + } + + t = s; + + s.x = ddsqu_d_d2(s); + + u = 1.58938307283228937328511e-10; + u = mla(u, s.x, -2.50506943502539773349318e-08); + u = mla(u, s.x, 2.75573131776846360512547e-06); + u = mla(u, s.x, -0.000198412698278911770864914); + u = mla(u, s.x, 0.0083333333333191845961746); + u = mla(u, s.x, -0.166666666666666130709393); + + u *= s.x * t.x; + + x = ddadd_d2_d2_d(t, u); + r.x = x.x + x.y; + + if (xisnegzero(d)) r.x = -0.0; + + u = -1.13615350239097429531523e-11; + u = mla(u, s.x, 2.08757471207040055479366e-09); + u = mla(u, s.x, -2.75573144028847567498567e-07); + u = mla(u, s.x, 2.48015872890001867311915e-05); + u = mla(u, s.x, -0.00138888888888714019282329); + u = mla(u, s.x, 0.0416666666666665519592062); + u = mla(u, s.x, -0.5); + + x = ddadd_d2_d_d2(1, ddmul_d2_d_d(s.x, u)); + r.y = x.x + x.y; + + if ((ql & 1) != 0) { u = r.y; r.y = r.x; r.x = u; } + if ((ql & 2) != 0) { r.x = -r.x; } + if (((ql+1) & 2) != 0) { r.y = -r.y; } + + return r; +} + +EXPORT CONST Sleef_double2 xsincospi_u05(double d) { + double u, s, t; + Sleef_double2 r, x, s2; + + u = d * 4; + int q = ceilk(u) & ~(int)1; + + s = u - (double)q; + t = s; + s = s * s; + s2 = ddmul_d2_d_d(t, t); + + // + + u = -2.02461120785182399295868e-14; + u = mla(u, s, 6.94821830580179461327784e-12); + u = mla(u, s, -1.75724749952853179952664e-09); + u = mla(u, s, 3.13361688966868392878422e-07); + u = mla(u, s, -3.6576204182161551920361e-05); + u = mla(u, s, 0.00249039457019271850274356); + x = ddadd2_d2_d_d2(u * s, dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); + x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), dd(0.785398163397448278999491, 3.06287113727155002607105e-17)); + + x = ddmul_d2_d2_d(x, t); + r.x = x.x + x.y; + + if (xisnegzero(d)) r.x = -0.0; + + // + + u = 9.94480387626843774090208e-16; + u = mla(u, s, -3.89796226062932799164047e-13); + u = mla(u, s, 1.15011582539996035266901e-10); + u = mla(u, s, -2.4611369501044697495359e-08); + u = mla(u, s, 3.59086044859052754005062e-06); + u = mla(u, s, -0.000325991886927389905997954); + x = ddadd2_d2_d_d2(u * s, dd(0.0158543442438155018914259, -1.04693272280631521908845e-18)); + x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), dd(-0.308425137534042437259529, -1.95698492133633550338345e-17)); + + x = ddadd2_d2_d2_d(ddmul_d2_d2_d2(x, s2), 1); + r.y = x.x + x.y; + + // + + if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } + if ((q & 4) != 0) { r.x = -r.x; } + if (((q+2) & 4) != 0) { r.y = -r.y; } + + if (fabsk(d) > TRIGRANGEMAX3/4) { r.x = 0; r.y = 1; } + if (xisinf(d)) { r.x = r.y = SLEEF_NAN; } + + return r; +} + +EXPORT CONST Sleef_double2 xsincospi_u35(double d) { + double u, s, t; + Sleef_double2 r; + + u = d * 4; + int q = ceilk(u) & ~(int)1; + + s = u - (double)q; + t = s; + s = s * s; + + // + + u = +0.6880638894766060136e-11; + u = mla(u, s, -0.1757159564542310199e-8); + u = mla(u, s, +0.3133616327257867311e-6); + u = mla(u, s, -0.3657620416388486452e-4); + u = mla(u, s, +0.2490394570189932103e-2); + u = mla(u, s, -0.8074551218828056320e-1); + u = mla(u, s, +0.7853981633974482790e+0); + + r.x = u * t; + + // + + u = -0.3860141213683794352e-12; + u = mla(u, s, +0.1150057888029681415e-9); + u = mla(u, s, -0.2461136493006663553e-7); + u = mla(u, s, +0.3590860446623516713e-5); + u = mla(u, s, -0.3259918869269435942e-3); + u = mla(u, s, +0.1585434424381541169e-1); + u = mla(u, s, -0.3084251375340424373e+0); + u = mla(u, s, 1); + + r.y = u; + + // + + if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } + if ((q & 4) != 0) { r.x = -r.x; } + if (((q+2) & 4) != 0) { r.y = -r.y; } + + if (fabsk(d) > TRIGRANGEMAX3/4) { r.x = 0; r.y = 1; } + if (xisinf(d)) { r.x = r.y = SLEEF_NAN; } + + return r; +} + +static INLINE CONST Sleef_double2 sinpik(double d) { + double u, s, t; + Sleef_double2 x, s2; + + u = d * 4; + int q = ceilk(u) & ~1; + int o = (q & 2) != 0; + + s = u - (double)q; + t = s; + s = s * s; + s2 = ddmul_d2_d_d(t, t); + + // + + u = o ? 9.94480387626843774090208e-16 : -2.02461120785182399295868e-14; + u = mla(u, s, o ? -3.89796226062932799164047e-13 : 6.94821830580179461327784e-12); + u = mla(u, s, o ? 1.15011582539996035266901e-10 : -1.75724749952853179952664e-09); + u = mla(u, s, o ? -2.4611369501044697495359e-08 : 3.13361688966868392878422e-07); + u = mla(u, s, o ? 3.59086044859052754005062e-06 : -3.6576204182161551920361e-05); + u = mla(u, s, o ? -0.000325991886927389905997954 : 0.00249039457019271850274356); + x = ddadd2_d2_d_d2(u * s, o ? dd(0.0158543442438155018914259, -1.04693272280631521908845e-18) : + dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); + x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), o ? dd(-0.308425137534042437259529, -1.95698492133633550338345e-17) : + dd(0.785398163397448278999491, 3.06287113727155002607105e-17)); + + x = ddmul_d2_d2_d2(x, o ? s2 : dd(t, 0)); + x = o ? ddadd2_d2_d2_d(x, 1) : x; + + // + + if ((q & 4) != 0) { x.x = -x.x; x.y = -x.y; } + + return x; +} + +EXPORT CONST double xsinpi_u05(double d) { + Sleef_double2 x = sinpik(d); + double r = x.x + x.y; + + if (xisnegzero(d)) r = -0.0; + if (fabsk(d) > TRIGRANGEMAX3/4) r = 0; + if (xisinf(d)) r = SLEEF_NAN; + + return r; +} + +static INLINE CONST Sleef_double2 cospik(double d) { + double u, s, t; + Sleef_double2 x, s2; + + u = d * 4; + int q = ceilk(u) & ~1; + int o = (q & 2) == 0; + + s = u - (double)q; + t = s; + s = s * s; + s2 = ddmul_d2_d_d(t, t); + + // + + u = o ? 9.94480387626843774090208e-16 : -2.02461120785182399295868e-14; + u = mla(u, s, o ? -3.89796226062932799164047e-13 : 6.94821830580179461327784e-12); + u = mla(u, s, o ? 1.15011582539996035266901e-10 : -1.75724749952853179952664e-09); + u = mla(u, s, o ? -2.4611369501044697495359e-08 : 3.13361688966868392878422e-07); + u = mla(u, s, o ? 3.59086044859052754005062e-06 : -3.6576204182161551920361e-05); + u = mla(u, s, o ? -0.000325991886927389905997954 : 0.00249039457019271850274356); + x = ddadd2_d2_d_d2(u * s, o ? dd(0.0158543442438155018914259, -1.04693272280631521908845e-18) : + dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); + x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), o ? dd(-0.308425137534042437259529, -1.95698492133633550338345e-17) : + dd(0.785398163397448278999491, 3.06287113727155002607105e-17)); + + x = ddmul_d2_d2_d2(x, o ? s2 : dd(t, 0)); + x = o ? ddadd2_d2_d2_d(x, 1) : x; + + // + + if (((q+2) & 4) != 0) { x.x = -x.x; x.y = -x.y; } + + return x; +} + +EXPORT CONST double xcospi_u05(double d) { + Sleef_double2 x = cospik(d); + double r = x.x + x.y; + + if (fabsk(d) > TRIGRANGEMAX3/4) r = 1; + if (xisinf(d)) r = SLEEF_NAN; + + return r; +} + +EXPORT CONST double xtan(double d) { + double u, s, x, y; + int ql; + + if (fabsk(d) < TRIGRANGEMAX2) { + ql = rintk(d * (2 * M_1_PI)); + x = mla(ql, -PI_A2*0.5, d); + x = mla(ql, -PI_B2*0.5, x); + } else if (fabsk(d) < 1e+6) { + double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24); + ql = rintk(d * (2 * M_1_PI) - dqh); + + x = mla(dqh, -PI_A * 0.5, d); + x = mla( ql, -PI_A * 0.5, x); + x = mla(dqh, -PI_B * 0.5, x); + x = mla( ql, -PI_B * 0.5, x); + x = mla(dqh, -PI_C * 0.5, x); + x = mla( ql, -PI_C * 0.5, x); + x = mla(dqh + ql, -PI_D * 0.5, x); + } else { + ddi_t ddi = rempi(d); + ql = ddi.i; + x = ddi.dd.x + ddi.dd.y; + if (xisinf(d) || xisnan(d)) x = SLEEF_NAN; + } + + x *= 0.5; + s = x * x; + + double s2 = s * s, s4 = s2 * s2; + u = POLY8(s, s2, s4, + +0.3245098826639276316e-3, + +0.5619219738114323735e-3, + +0.1460781502402784494e-2, + +0.3591611540792499519e-2, + +0.8863268409563113126e-2, + +0.2186948728185535498e-1, + +0.5396825399517272970e-1, + +0.1333333333330500581e+0); + + u = mla(u, s, +0.3333333333333343695e+0); + u = mla(s, u * x, x); + + y = mla(u, u, -1); + x = -2 * u; + + if ((ql & 1) != 0) { double t = x; x = y; y = -t; } + + u = x / y; + + return u; +} + +EXPORT CONST double xtan_u1(double d) { + double u; + Sleef_double2 s, t, x, y; + int ql; + + if (fabsk(d) < TRIGRANGEMAX2) { + ql = rintk(d * (2 * M_1_PI)); + u = mla(ql, -PI_A2*0.5, d); + s = ddadd_d2_d_d(u, ql * (-PI_B2*0.5)); + } else if (fabsk(d) < TRIGRANGEMAX) { + const double dqh = trunck(d * (M_2_PI / (1 << 24))) * (double)(1 << 24); + s = ddadd2_d2_d2_d(ddmul_d2_d2_d(dd(M_2_PI_H, M_2_PI_L), d), (d < 0 ? -0.5 : 0.5) - dqh); + ql = s.x + s.y; + + u = mla(dqh, -PI_A*0.5, d); + s = ddadd_d2_d_d (u, ql * (-PI_A*0.5)); + s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5)); + s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5)); + s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5)); + s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5)); + s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5)); + } else { + ddi_t ddi = rempi(d); + ql = ddi.i; + s = ddi.dd; + if (xisinf(d) || xisnan(d)) s.x = SLEEF_NAN; + } + + t = ddscale_d2_d2_d(s, 0.5); + s = ddsqu_d2_d2(t); + + double s2 = s.x * s.x, s4 = s2 * s2; + u = POLY8(s.x, s2, s4, + +0.3245098826639276316e-3, + +0.5619219738114323735e-3, + +0.1460781502402784494e-2, + +0.3591611540792499519e-2, + +0.8863268409563113126e-2, + +0.2186948728185535498e-1, + +0.5396825399517272970e-1, + +0.1333333333330500581e+0); + + u = mla(u, s.x, +0.3333333333333343695e+0); + x = ddadd_d2_d2_d2(t, ddmul_d2_d2_d(ddmul_d2_d2_d2(s, t), u)); + + y = ddadd_d2_d_d2(-1, ddsqu_d2_d2(x)); + x = ddscale_d2_d2_d(x, -2); + + if ((ql & 1) != 0) { t = x; x = y; y = ddneg_d2_d2(t); } + + x = dddiv_d2_d2_d2(x, y); + + u = x.x + x.y; + + if (xisnegzero(d)) u = d; + + return u; +} + +EXPORT CONST double xlog(double d) { + double x, x2, t, m; + int e; + + int o = d < DBL_MIN; + if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); + + e = ilogb2k(d * (1.0/0.75)); + m = ldexp3k(d, -e); + + if (o) e -= 64; + + x = (m-1) / (m+1); + x2 = x * x; + + double x4 = x2 * x2, x8 = x4 * x4; + + t = POLY7(x2, x4, x8, + 0.153487338491425068243146, + 0.152519917006351951593857, + 0.181863266251982985677316, + 0.222221366518767365905163, + 0.285714294746548025383248, + 0.399999999950799600689777, + 0.6666666666667778740063); + + x = x * 2 + 0.693147180559945286226764 * e + x * x2 * t; + + if (xisinf(d)) x = SLEEF_INFINITY; + if (d < 0 || xisnan(d)) x = SLEEF_NAN; + if (d == 0) x = -SLEEF_INFINITY; + + return x; +} + +EXPORT CONST double xexp(double d) { + int q = (int)rintk(d * R_LN2); + double s, u; + + s = mla(q, -L2U, d); + s = mla(q, -L2L, s); + + double s2 = s * s, s4 = s2 * s2, s8 = s4 * s4; + u = POLY10(s, s2, s4, s8, + 2.08860621107283687536341e-09, + 2.51112930892876518610661e-08, + 2.75573911234900471893338e-07, + 2.75572362911928827629423e-06, + 2.4801587159235472998791e-05, + 0.000198412698960509205564975, + 0.00138888888889774492207962, + 0.00833333333331652721664984, + 0.0416666666666665047591422, + 0.166666666666666851703837); + u = mla(u, s, +0.5); + + u = s * s * u + s + 1; + u = ldexp2k(u, q); + + if (d > 709.78271114955742909217217426) u = SLEEF_INFINITY; + if (d < -1000) u = 0; + + return u; +} + +static INLINE CONST double expm1k(double d) { + int q = (int)rintk(d * R_LN2); + double s, u; + + s = mla(q, -L2U, d); + s = mla(q, -L2L, s); + + double s2 = s * s, s4 = s2 * s2, s8 = s4 * s4; + u = POLY10(s, s2, s4, s8, + 2.08860621107283687536341e-09, + 2.51112930892876518610661e-08, + 2.75573911234900471893338e-07, + 2.75572362911928827629423e-06, + 2.4801587159235472998791e-05, + 0.000198412698960509205564975, + 0.00138888888889774492207962, + 0.00833333333331652721664984, + 0.0416666666666665047591422, + 0.166666666666666851703837); + + u = mla(s2, 0.5, s2 * s * u) + s; + + if (q != 0) u = ldexp2k(u + 1, q) - 1; + + return u; +} + +static INLINE CONST Sleef_double2 logk(double d) { + Sleef_double2 x, x2, s; + double m, t; + int e; + + int o = d < DBL_MIN; + if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); + + e = ilogb2k(d * (1.0/0.75)); + m = ldexp3k(d, -e); + + if (o) e -= 64; + + x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); + x2 = ddsqu_d2_d2(x); + + double x4 = x2.x * x2.x, x8 = x4 * x4, x16 = x8 * x8; + t = POLY9(x2.x, x4, x8, x16, + 0.116255524079935043668677, + 0.103239680901072952701192, + 0.117754809412463995466069, + 0.13332981086846273921509, + 0.153846227114512262845736, + 0.181818180850050775676507, + 0.222222222230083560345903, + 0.285714285714249172087875, + 0.400000000000000077715612); + + Sleef_double2 c = dd(0.666666666666666629659233, 3.80554962542412056336616e-17); + s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e); + s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); + x = ddmul_d2_d2_d2(x2, x); + s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d2(x, c)); + x = ddmul_d2_d2_d2(x2, x); + s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(x, t)); + + return s; +} + +EXPORT CONST double xlog_u1(double d) { + Sleef_double2 x, s; + double m, t, x2; + int e; + + int o = d < DBL_MIN; + if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); + + e = ilogb2k(d * (1.0/0.75)); + m = ldexp3k(d, -e); + + if (o) e -= 64; + + x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); + x2 = x.x * x.x; + + double x4 = x2 * x2, x8 = x4 * x4; + t = POLY7(x2, x4, x8, + 0.1532076988502701353e+0, + 0.1525629051003428716e+0, + 0.1818605932937785996e+0, + 0.2222214519839380009e+0, + 0.2857142932794299317e+0, + 0.3999999999635251990e+0, + 0.6666666666667333541e+0); + + s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), (double)e); + s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); + s = ddadd_d2_d2_d(s, x2 * x.x * t); + + double r = s.x + s.y; + + if (xisinf(d)) r = SLEEF_INFINITY; + if (d < 0 || xisnan(d)) r = SLEEF_NAN; + if (d == 0) r = -SLEEF_INFINITY; + + return r; +} + +static INLINE CONST double expk(Sleef_double2 d) { + int q = (int)rintk((d.x + d.y) * R_LN2); + Sleef_double2 s, t; + double u; + + s = ddadd2_d2_d2_d(d, q * -L2U); + s = ddadd2_d2_d2_d(s, q * -L2L); + + s = ddnormalize_d2_d2(s); + + double s2 = s.x * s.x, s4 = s2 * s2, s8 = s4 * s4; + u = POLY10(s.x, s2, s4, s8, + 2.51069683420950419527139e-08, + 2.76286166770270649116855e-07, + 2.75572496725023574143864e-06, + 2.48014973989819794114153e-05, + 0.000198412698809069797676111, + 0.0013888888939977128960529, + 0.00833333333332371417601081, + 0.0416666666665409524128449, + 0.166666666666666740681535, + 0.500000000000000999200722); + + t = ddadd_d2_d_d2(1, s); + t = ddadd_d2_d2_d2(t, ddmul_d2_d2_d(ddsqu_d2_d2(s), u)); + + u = ldexpk(t.x + t.y, q); + + if (d.x < -1000) u = 0; + + return u; +} + +EXPORT CONST double xpow(double x, double y) { + int yisint = xisint(y); + int yisodd = yisint && xisodd(y); + + Sleef_double2 d = ddmul_d2_d2_d(logk(fabsk(x)), y); + double result = expk(d); + if (d.x > 709.78271114955742909217217426) result = SLEEF_INFINITY; + + result = xisnan(result) ? SLEEF_INFINITY : result; + result *= (x > 0 ? 1 : (!yisint ? SLEEF_NAN : (yisodd ? -1 : 1))); + + double efx = mulsign(fabsk(x) - 1, y); + if (xisinf(y)) result = efx < 0 ? 0.0 : (efx == 0 ? 1.0 : SLEEF_INFINITY); + if (xisinf(x) || x == 0) result = (yisodd ? sign(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : SLEEF_INFINITY); + if (xisnan(x) || xisnan(y)) result = SLEEF_NAN; + if (y == 0 || x == 1) result = 1; + + return result; +} + +static INLINE CONST Sleef_double2 expk2(Sleef_double2 d) { + int q = (int)rintk((d.x + d.y) * R_LN2); + Sleef_double2 s, t; + double u; + + s = ddadd2_d2_d2_d(d, q * -L2U); + s = ddadd2_d2_d2_d(s, q * -L2L); + + u = +0.1602472219709932072e-9; + u = mla(u, s.x, +0.2092255183563157007e-8); + u = mla(u, s.x, +0.2505230023782644465e-7); + u = mla(u, s.x, +0.2755724800902135303e-6); + u = mla(u, s.x, +0.2755731892386044373e-5); + u = mla(u, s.x, +0.2480158735605815065e-4); + u = mla(u, s.x, +0.1984126984148071858e-3); + u = mla(u, s.x, +0.1388888888886763255e-2); + u = mla(u, s.x, +0.8333333333333347095e-2); + u = mla(u, s.x, +0.4166666666666669905e-1); + + t = ddadd2_d2_d2_d(ddmul_d2_d2_d(s, u), +0.1666666666666666574e+0); + t = ddadd2_d2_d2_d(ddmul_d2_d2_d2(s, t), 0.5); + t = ddadd2_d2_d2_d2(s, ddmul_d2_d2_d2(ddsqu_d2_d2(s), t)); + + t = ddadd2_d2_d_d2(1, t); + + t.x = ldexp2k(t.x, q); + t.y = ldexp2k(t.y, q); + + return d.x < -1000 ? dd(0, 0) : t; +} + +EXPORT CONST double xsinh(double x) { + double y = fabsk(x); + Sleef_double2 d = expk2(dd(y, 0)); + d = ddsub_d2_d2_d2(d, ddrec_d2_d2(d)); + y = (d.x + d.y) * 0.5; + + y = fabsk(x) > 710 ? SLEEF_INFINITY : y; + y = xisnan(y) ? SLEEF_INFINITY : y; + y = mulsign(y, x); + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + +EXPORT CONST double xcosh(double x) { + double y = fabsk(x); + Sleef_double2 d = expk2(dd(y, 0)); + d = ddadd_d2_d2_d2(d, ddrec_d2_d2(d)); + y = (d.x + d.y) * 0.5; + + y = fabsk(x) > 710 ? SLEEF_INFINITY : y; + y = xisnan(y) ? SLEEF_INFINITY : y; + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + +EXPORT CONST double xtanh(double x) { + double y = fabsk(x); + Sleef_double2 d = expk2(dd(y, 0)); + Sleef_double2 e = ddrec_d2_d2(d); + d = dddiv_d2_d2_d2(ddsub_d2_d2_d2(d, e), ddadd_d2_d2_d2(d, e)); + y = d.x + d.y; + + y = fabsk(x) > 18.714973875 ? 1.0 : y; + y = xisnan(y) ? 1.0 : y; + y = mulsign(y, x); + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + +EXPORT CONST double xsinh_u35(double x) { + double e = expm1k(fabsk(x)); + double y = (e + 2) / (e + 1) * (0.5 * e); + + y = fabsk(x) > 709 ? SLEEF_INFINITY : y; + y = xisnan(y) ? SLEEF_INFINITY : y; + y = mulsign(y, x); + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + +EXPORT CONST double xcosh_u35(double x) { + double e = xexp(fabsk(x)); + double y = 0.5 / e + 0.5 * e; + + y = fabsk(x) > 709 ? SLEEF_INFINITY : y; + y = xisnan(y) ? SLEEF_INFINITY : y; + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + +EXPORT CONST double xtanh_u35(double x) { + double y = fabsk(x); + double d = expm1k(2*y); + y = d / (d + 2); + + y = fabsk(x) > 18.714973875 ? 1.0 : y; + y = xisnan(y) ? 1.0 : y; + y = mulsign(y, x); + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + +static INLINE CONST Sleef_double2 logk2(Sleef_double2 d) { + Sleef_double2 x, x2, m, s; + double t; + int e; + + e = ilogbk(d.x * (1.0/0.75)); + + m.x = ldexp2k(d.x, -e); + m.y = ldexp2k(d.y, -e); + + x = dddiv_d2_d2_d2(ddadd2_d2_d2_d(m, -1), ddadd2_d2_d2_d(m, 1)); + x2 = ddsqu_d2_d2(x); + + double x4 = x2.x * x2.x, x8 = x4 * x4; + t = POLY7(x2.x, x4, x8, + 0.13860436390467167910856, + 0.131699838841615374240845, + 0.153914168346271945653214, + 0.181816523941564611721589, + 0.22222224632662035403996, + 0.285714285511134091777308, + 0.400000000000914013309483); + t = mla(t, x2.x, 0.666666666666664853302393); + + s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e); + s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); + s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t)); + + return s; +} + +EXPORT CONST double xasinh(double x) { + double y = fabsk(x); + Sleef_double2 d; + + d = y > 1 ? ddrec_d2_d(x) : dd(y, 0); + d = ddsqrt_d2_d2(ddadd2_d2_d2_d(ddsqu_d2_d2(d), 1)); + d = y > 1 ? ddmul_d2_d2_d(d, y) : d; + + d = logk2(ddnormalize_d2_d2(ddadd_d2_d2_d(d, x))); + y = d.x + d.y; + + y = (fabsk(x) > SQRT_DBL_MAX || xisnan(y)) ? mulsign(SLEEF_INFINITY, x) : y; + y = xisnan(x) ? SLEEF_NAN : y; + y = xisnegzero(x) ? -0.0 : y; + + return y; +} + +EXPORT CONST double xacosh(double x) { + Sleef_double2 d = logk2(ddadd2_d2_d2_d(ddmul_d2_d2_d2(ddsqrt_d2_d2(ddadd2_d2_d_d(x, 1)), ddsqrt_d2_d2(ddadd2_d2_d_d(x, -1))), x)); + double y = d.x + d.y; + + y = (x > SQRT_DBL_MAX || xisnan(y)) ? SLEEF_INFINITY : y; + y = x == 1.0 ? 0.0 : y; + y = x < 1.0 ? SLEEF_NAN : y; + y = xisnan(x) ? SLEEF_NAN : y; + + return y; +} + +EXPORT CONST double xatanh(double x) { + double y = fabsk(x); + Sleef_double2 d = logk2(dddiv_d2_d2_d2(ddadd2_d2_d_d(1, y), ddadd2_d2_d_d(1, -y))); + y = y > 1.0 ? SLEEF_NAN : (y == 1.0 ? SLEEF_INFINITY : (d.x + d.y) * 0.5); + + y = mulsign(y, x); + y = (xisinf(x) || xisnan(y)) ? SLEEF_NAN : y; + + return y; +} + +// + +EXPORT CONST double xcbrt(double d) { // max error : 2 ulps + double x, y, q = 1.0; + int e, r; + + e = ilogbk(fabsk(d))+1; + d = ldexp2k(d, -e); + r = (e + 6144) % 3; + q = (r == 1) ? 1.2599210498948731647672106 : q; + q = (r == 2) ? 1.5874010519681994747517056 : q; + q = ldexp2k(q, (e + 6144) / 3 - 2048); + + q = mulsign(q, d); + d = fabsk(d); + + x = -0.640245898480692909870982; + x = mla(x, d, 2.96155103020039511818595); + x = mla(x, d, -5.73353060922947843636166); + x = mla(x, d, 6.03990368989458747961407); + x = mla(x, d, -3.85841935510444988821632); + x = mla(x, d, 2.2307275302496609725722); + + y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0); + y = d * x * x; + y = (y - (2.0 / 3.0) * y * (y * x - 1)) * q; + + return y; +} + +EXPORT CONST double xcbrt_u1(double d) { + double x, y, z; + Sleef_double2 q2 = dd(1, 0), u, v; + int e, r; + + e = ilogbk(fabsk(d))+1; + d = ldexp2k(d, -e); + r = (e + 6144) % 3; + q2 = (r == 1) ? dd(1.2599210498948731907, -2.5899333753005069177e-17) : q2; + q2 = (r == 2) ? dd(1.5874010519681995834, -1.0869008194197822986e-16) : q2; + + q2.x = mulsign(q2.x, d); q2.y = mulsign(q2.y, d); + d = fabsk(d); + + x = -0.640245898480692909870982; + x = mla(x, d, 2.96155103020039511818595); + x = mla(x, d, -5.73353060922947843636166); + x = mla(x, d, 6.03990368989458747961407); + x = mla(x, d, -3.85841935510444988821632); + x = mla(x, d, 2.2307275302496609725722); + + y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0); + + z = x; + + u = ddmul_d2_d_d(x, x); + u = ddmul_d2_d2_d2(u, u); + u = ddmul_d2_d2_d(u, d); + u = ddadd2_d2_d2_d(u, -x); + y = u.x + u.y; + + y = -2.0 / 3.0 * y * z; + v = ddadd2_d2_d2_d(ddmul_d2_d_d(z, z), y); + v = ddmul_d2_d2_d(v, d); + v = ddmul_d2_d2_d2(v, q2); + z = ldexp2k(v.x + v.y, (e + 6144) / 3 - 2048); + + if (xisinf(d)) { z = mulsign(SLEEF_INFINITY, q2.x); } + if (d == 0) { z = mulsign(0, q2.x); } + + return z; +} + +EXPORT CONST double xexp2(double d) { + int q = (int)rintk(d); + double s, u; + + s = d - q; + + double s2 = s * s, s4 = s2 * s2, s8 = s4 * s4; + u = POLY10(s, s2, s4, s8, + +0.4434359082926529454e-9, + +0.7073164598085707425e-8, + +0.1017819260921760451e-6, + +0.1321543872511327615e-5, + +0.1525273353517584730e-4, + +0.1540353045101147808e-3, + +0.1333355814670499073e-2, + +0.9618129107597600536e-2, + +0.5550410866482046596e-1, + +0.2402265069591012214e+0); + u = mla(u, s, +0.6931471805599452862e+0); + + u = ddnormalize_d2_d2(ddadd_d2_d_d2(1, ddmul_d2_d_d(u, s))).x; + + u = ldexp2k(u, q); + + if (d >= 1024) u = SLEEF_INFINITY; + if (d < -2000) u = 0; + + return u; +} + +EXPORT CONST double xexp2_u35(double d) { + int q = (int)rintk(d); + double s, u; + + s = d - q; + + u = +0.4434359082926529454e-9; + u = mla(u, s, +0.7073164598085707425e-8); + u = mla(u, s, +0.1017819260921760451e-6); + u = mla(u, s, +0.1321543872511327615e-5); + u = mla(u, s, +0.1525273353517584730e-4); + u = mla(u, s, +0.1540353045101147808e-3); + u = mla(u, s, +0.1333355814670499073e-2); + u = mla(u, s, +0.9618129107597600536e-2); + u = mla(u, s, +0.5550410866482046596e-1); + u = mla(u, s, +0.2402265069591012214e+0); + u = mla(u, s, +0.6931471805599452862e+0); + u = mla(u, s, +0.1000000000000000000e+1); + + u = ldexp2k(u, q); + + if (d >= 1024) u = SLEEF_INFINITY; + if (d < -2000) u = 0; + + return u; +} + +EXPORT CONST double xexp10(double d) { + int q = (int)rintk(d * LOG10_2); + double s, u; + + s = mla(q, -L10U, d); + s = mla(q, -L10L, s); + + u = +0.2411463498334267652e-3; + u = mla(u, s, +0.1157488415217187375e-2); + u = mla(u, s, +0.5013975546789733659e-2); + u = mla(u, s, +0.1959762320720533080e-1); + u = mla(u, s, +0.6808936399446784138e-1); + u = mla(u, s, +0.2069958494722676234e+0); + u = mla(u, s, +0.5393829292058536229e+0); + u = mla(u, s, +0.1171255148908541655e+1); + u = mla(u, s, +0.2034678592293432953e+1); + u = mla(u, s, +0.2650949055239205876e+1); + u = mla(u, s, +0.2302585092994045901e+1); + + u = ddnormalize_d2_d2(ddadd_d2_d_d2(1, ddmul_d2_d_d(u, s))).x; + + u = ldexp2k(u, q); + + if (d > 308.25471555991671) u = SLEEF_INFINITY; // log10(DBL_MAX) + if (d < -350) u = 0; + + return u; +} + +EXPORT CONST double xexp10_u35(double d) { + int q = (int)rintk(d * LOG10_2); + double s, u; + + s = mla(q, -L10U, d); + s = mla(q, -L10L, s); + + u = +0.2411463498334267652e-3; + u = mla(u, s, +0.1157488415217187375e-2); + u = mla(u, s, +0.5013975546789733659e-2); + u = mla(u, s, +0.1959762320720533080e-1); + u = mla(u, s, +0.6808936399446784138e-1); + u = mla(u, s, +0.2069958494722676234e+0); + u = mla(u, s, +0.5393829292058536229e+0); + u = mla(u, s, +0.1171255148908541655e+1); + u = mla(u, s, +0.2034678592293432953e+1); + u = mla(u, s, +0.2650949055239205876e+1); + u = mla(u, s, +0.2302585092994045901e+1); + u = mla(u, s, +0.1000000000000000000e+1); + + u = ldexp2k(u, q); + + if (d > 308.25471555991671) u = SLEEF_INFINITY; + if (d < -350) u = 0; + + return u; +} + +EXPORT CONST double xexpm1(double a) { + Sleef_double2 d = ddadd2_d2_d2_d(expk2(dd(a, 0)), -1.0); + double x = d.x + d.y; + if (a > 709.782712893383996732223) x = SLEEF_INFINITY; // log(DBL_MAX) + if (a < -36.736800569677101399113302437) x = -1; // log(1 - nexttoward(1, 0)) + if (xisnegzero(a)) x = -0.0; + return x; +} + +EXPORT CONST double xlog10(double d) { + Sleef_double2 x, s; + double m, t, x2; + int e; + + int o = d < DBL_MIN; + if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); + + e = ilogb2k(d * (1.0/0.75)); + m = ldexp3k(d, -e); + + if (o) e -= 64; + + x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); + x2 = x.x * x.x; + + double x4 = x2 * x2, x8 = x4 * x4; + t = POLY7(x2, x4, x8, + +0.6653725819576758460e-1, + +0.6625722782820833712e-1, + +0.7898105214313944078e-1, + +0.9650955035715275132e-1, + +0.1240841409721444993e+0, + +0.1737177927454605086e+0, + +0.2895296546021972617e+0); + + s = ddmul_d2_d2_d(dd(0.30102999566398119802, -2.803728127785170339e-18), (double)e); + s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d2(x, dd(0.86858896380650363334, 1.1430059694096389311e-17))); + s = ddadd_d2_d2_d(s, x2 * x.x * t); + + double r = s.x + s.y; + + if (xisinf(d)) r = SLEEF_INFINITY; + if (d < 0 || xisnan(d)) r = SLEEF_NAN; + if (d == 0) r = -SLEEF_INFINITY; + + return r; +} + +EXPORT CONST double xlog2(double d) { + Sleef_double2 x, s; + double m, t, x2; + int e; + + int o = d < DBL_MIN; + if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); + + e = ilogb2k(d * (1.0/0.75)); + m = ldexp3k(d, -e); + + if (o) e -= 64; + + x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); + x2 = x.x * x.x; + + double x4 = x2 * x2, x8 = x4 * x4; + t = POLY7(x2, x4, x8, + +0.2211941750456081490e+0, + +0.2200768693152277689e+0, + +0.2623708057488514656e+0, + +0.3205977477944495502e+0, + +0.4121985945485324709e+0, + +0.5770780162997058982e+0, + +0.96179669392608091449); + + s = ddadd2_d2_d_d2(e, ddmul_d2_d2_d2(x, dd(2.885390081777926774, 6.0561604995516736434e-18))); + s = ddadd2_d2_d2_d(s, x2 * x.x * t); + + double r = s.x + s.y; + + if (xisinf(d)) r = SLEEF_INFINITY; + if (d < 0 || xisnan(d)) r = SLEEF_NAN; + if (d == 0) r = -SLEEF_INFINITY; + + return r; +} + +EXPORT CONST double xlog2_u35(double d) { + double m, t, x, x2; + int e; + + int o = d < DBL_MIN; + if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); + + e = ilogb2k(d * (1.0/0.75)); + m = ldexp3k(d, -e); + + if (o) e -= 64; + + x = (m - 1) / (m + 1); + x2 = x * x; + + t = +0.2211941750456081490e+0; + t = mla(t, x2, +0.2200768693152277689e+0); + t = mla(t, x2, +0.2623708057488514656e+0); + t = mla(t, x2, +0.3205977477944495502e+0); + t = mla(t, x2, +0.4121985945485324709e+0); + t = mla(t, x2, +0.5770780162997058982e+0); + t = mla(t, x2, +0.96179669392608091449 ); + + Sleef_double2 s = ddadd_d2_d_d2(e, ddmul_d2_d_d(2.885390081777926774, x)); + double r = mla(t, x * x2, s.x + s.y); + + if (xisinf(d)) r = SLEEF_INFINITY; + if (d < 0 || xisnan(d)) r = SLEEF_NAN; + if (d == 0) r = -SLEEF_INFINITY; + + return r; +} + +EXPORT CONST double xlog1p(double d) { + Sleef_double2 x, s; + double m, t, x2; + int e; + + double dp1 = d + 1; + + int o = dp1 < DBL_MIN; + if (o) dp1 *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); + + e = ilogb2k(dp1 * (1.0/0.75)); + + t = ldexp3k(1, -e); + m = mla(d, t, t - 1); + + if (o) e -= 64; + + x = dddiv_d2_d2_d2(dd(m, 0), ddadd_d2_d_d(2, m)); + x2 = x.x * x.x; + + double x4 = x2 * x2, x8 = x4 * x4; + t = POLY7(x2, x4, x8, + 0.1532076988502701353e+0, + 0.1525629051003428716e+0, + 0.1818605932937785996e+0, + 0.2222214519839380009e+0, + 0.2857142932794299317e+0, + 0.3999999999635251990e+0, + 0.6666666666667333541e+0); + + s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), (double)e); + s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); + s = ddadd_d2_d2_d(s, x2 * x.x * t); + + double r = s.x + s.y; + + if (d > 1e+307) r = SLEEF_INFINITY; + if (d < -1 || xisnan(d)) r = SLEEF_NAN; + if (d == -1) r = -SLEEF_INFINITY; + if (xisnegzero(d)) r = -0.0; + + return r; +} + +// + +EXPORT CONST double xfma(double x, double y, double z) { + double h2 = x * y + z, q = 1; + if (fabsk(h2) < 1e-300) { + const double c0 = UINT64_C(1) << 54, c1 = c0 * c0, c2 = c1 * c1; + x *= c1; + y *= c1; + z *= c2; + q = 1.0 / c2; + } + if (fabsk(h2) > 1e+299) { + const double c0 = UINT64_C(1) << 54, c1 = c0 * c0, c2 = c1 * c1; + x *= 1.0 / c1; + y *= 1.0 / c1; + z *= 1. / c2; + q = c2; + } + Sleef_double2 d = ddmul_d2_d_d(x, y); + d = ddadd2_d2_d2_d(d, z); + double ret = (x == 0 || y == 0) ? z : (d.x + d.y); + if ((xisinf(z) && !xisinf(x) && !xisnan(x) && !xisinf(y) && !xisnan(y))) h2 = z; + return (xisinf(h2) || xisnan(h2)) ? h2 : ret*q; +} + +EXPORT CONST double xsqrt_u05(double d) { + double q = 0.5; + + d = d < 0 ? SLEEF_NAN : d; + + if (d < 8.636168555094445E-78) { + d *= 1.157920892373162E77; + q = 2.9387358770557188E-39 * 0.5; + } + + if (d > 1.3407807929942597e+154) { + d *= 7.4583407312002070e-155; + q = 1.1579208923731620e+77 * 0.5; + } + + // http://en.wikipedia.org/wiki/Fast_inverse_square_root + double x = longBitsToDouble(0x5fe6ec85e7de30da - (doubleToRawLongBits(d + 1e-320) >> 1)); + + x = x * (1.5 - 0.5 * d * x * x); + x = x * (1.5 - 0.5 * d * x * x); + x = x * (1.5 - 0.5 * d * x * x) * d; + + Sleef_double2 d2 = ddmul_d2_d2_d2(ddadd2_d2_d_d2(d, ddmul_d2_d_d(x, x)), ddrec_d2_d(x)); + + double ret = (d2.x + d2.y) * q; + + ret = d == SLEEF_INFINITY ? SLEEF_INFINITY : ret; + ret = d == 0 ? d : ret; + + return ret; +} + +EXPORT CONST double xsqrt_u35(double d) { return xsqrt_u05(d); } +EXPORT CONST double xsqrt(double d) { return SQRT(d); } + +EXPORT CONST double xfabs(double x) { return fabsk(x); } + +EXPORT CONST double xcopysign(double x, double y) { return copysignk(x, y); } + +EXPORT CONST double xfmax(double x, double y) { + return y != y ? x : (x > y ? x : y); +} + +EXPORT CONST double xfmin(double x, double y) { + return y != y ? x : (x < y ? x : y); +} + +EXPORT CONST double xfdim(double x, double y) { + double ret = x - y; + if (ret < 0 || x == y) ret = 0; + return ret; +} + +EXPORT CONST double xtrunc(double x) { + double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); + fr = fr - (int32_t)fr; + return (xisinf(x) || fabsk(x) >= (double)(INT64_C(1) << 52)) ? x : copysignk(x - fr, x); +} + +EXPORT CONST double xfloor(double x) { + double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); + fr = fr - (int32_t)fr; + fr = fr < 0 ? fr+1.0 : fr; + return (xisinf(x) || fabsk(x) >= (double)(INT64_C(1) << 52)) ? x : copysignk(x - fr, x); +} + +EXPORT CONST double xceil(double x) { + double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); + fr = fr - (int32_t)fr; + fr = fr <= 0 ? fr : fr-1.0; + return (xisinf(x) || fabsk(x) >= (double)(INT64_C(1) << 52)) ? x : copysignk(x - fr, x); +} + +EXPORT CONST double xround(double d) { + double x = d + 0.5; + double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); + fr = fr - (int32_t)fr; + if (fr == 0 && x <= 0) x--; + fr = fr < 0 ? fr+1.0 : fr; + x = d == 0.49999999999999994449 ? 0 : x; // nextafter(0.5, 0) + return (xisinf(d) || fabsk(d) >= (double)(INT64_C(1) << 52)) ? d : copysignk(x - fr, d); +} + +EXPORT CONST double xrint(double d) { + double c = mulsign(INT64_C(1) << 52, d); + return fabsk(d) > INT64_C(1) << 52 ? d : orsign(d + c - c, d); +} + +EXPORT CONST double xhypot_u05(double x, double y) { + x = fabsk(x); + y = fabsk(y); + double min = fmink(x, y), n = min; + double max = fmaxk(x, y), d = max; + + if (max < DBL_MIN) { n *= UINT64_C(1) << 54; d *= UINT64_C(1) << 54; } + Sleef_double2 t = dddiv_d2_d2_d2(dd(n, 0), dd(d, 0)); + t = ddmul_d2_d2_d(ddsqrt_d2_d2(ddadd2_d2_d2_d(ddsqu_d2_d2(t), 1)), max); + double ret = t.x + t.y; + if (xisnan(ret)) ret = SLEEF_INFINITY; + if (min == 0) ret = max; + if (xisnan(x) || xisnan(y)) ret = SLEEF_NAN; + if (x == SLEEF_INFINITY || y == SLEEF_INFINITY) ret = SLEEF_INFINITY; + return ret; +} + +EXPORT CONST double xhypot_u35(double x, double y) { + x = fabsk(x); + y = fabsk(y); + double min = fmink(x, y); + double max = fmaxk(x, y); + + double t = min / max; + double ret = max * SQRT(1 + t*t); + if (min == 0) ret = max; + if (xisnan(x) || xisnan(y)) ret = SLEEF_NAN; + if (x == SLEEF_INFINITY || y == SLEEF_INFINITY) ret = SLEEF_INFINITY; + return ret; +} + +EXPORT CONST double xnextafter(double x, double y) { + union { + double f; + int64_t i; + } cx; + + x = x == 0 ? mulsign(0, y) : x; + cx.f = x; + int c = (cx.i < 0) == (y < x); + if (c) cx.i = -(cx.i ^ (UINT64_C(1) << 63)); + + if (x != y) cx.i--; + + if (c) cx.i = -(cx.i ^ (UINT64_C(1) << 63)); + + if (cx.f == 0 && x != 0) cx.f = mulsign(0, x); + if (x == 0 && y == 0) cx.f = y; + if (xisnan(x) || xisnan(y)) cx.f = SLEEF_NAN; + + return cx.f; +} + +EXPORT CONST double xfrfrexp(double x) { + union { + double f; + uint64_t u; + } cx; + + if (fabsk(x) < DBL_MIN) x *= (UINT64_C(1) << 63); + + cx.f = x; + cx.u &= ~UINT64_C(0x7ff0000000000000); + cx.u |= UINT64_C(0x3fe0000000000000); + + if (xisinf(x)) cx.f = mulsign(SLEEF_INFINITY, x); + if (x == 0) cx.f = x; + + return cx.f; +} + +EXPORT CONST int xexpfrexp(double x) { + union { + double f; + uint64_t u; + } cx; + + int ret = 0; + + if (fabsk(x) < DBL_MIN) { x *= (UINT64_C(1) << 63); ret = -63; } + + cx.f = x; + ret += (int32_t)(((cx.u >> 52) & 0x7ff)) - 0x3fe; + + if (x == 0 || xisnan(x) || xisinf(x)) ret = 0; + + return ret; +} + +static INLINE CONST double toward0(double d) { + return d == 0 ? 0 : longBitsToDouble(doubleToRawLongBits(d)-1); +} + +static INLINE CONST double removelsb(double d) { + return longBitsToDouble(doubleToRawLongBits(d) & INT64_C(0xfffffffffffffffe)); +} + +static INLINE CONST double ptrunc(double x) { + double fr = mla(-(double)(INT64_C(1) << 31), (int32_t)(x * (1.0 / (INT64_C(1) << 31))), x); + return fabsk(x) >= (double)(INT64_C(1) << 52) ? x : (x - (fr - (int32_t)fr)); +} + +EXPORT CONST double xfmod(double x, double y) { + double n = fabsk(x), d = fabsk(y), s = 1, q; + if (d < DBL_MIN) { n *= UINT64_C(1) << 54; d *= UINT64_C(1) << 54; s = 1.0 / (UINT64_C(1) << 54); } + Sleef_double2 r = dd(n, 0); + double rd = toward0(1.0 / d); + + for(int i=0;i < 21;i++) { // ceil(log2(DBL_MAX) / 52) + q = removelsb(ptrunc(toward0(r.x) * rd)); + q = (3*d > r.x && r.x > d) ? 2 : q; + q = (2*d > r.x && r.x > d) ? 1 : q; + q = r.x == d ? (r.y >= 0 ? 1 : 0) : q; + r = ddnormalize_d2_d2(ddadd2_d2_d2_d2(r, ddmul_d2_d_d(q, -d))); + if (r.x < d) break; + } + + double ret = r.x * s; + if (r.x + r.y == d) ret = 0; + ret = mulsign(ret, x); + if (n < d) ret = x; + if (d == 0) ret = SLEEF_NAN; + + return ret; +} + +static INLINE CONST double rintk2(double d) { + double c = mulsign(INT64_C(1) << 52, d); + return fabsk(d) > INT64_C(1) << 52 ? d : orsign(d + c - c, d); +} + +EXPORT CONST double xremainder(double x, double y) { + double n = fabsk(x), d = fabsk(y), s = 1, q; + if (d < DBL_MIN*2) { n *= UINT64_C(1) << 54; d *= UINT64_C(1) << 54; s = 1.0 / (UINT64_C(1) << 54); } + double rd = 1.0 / d; + Sleef_double2 r = dd(n, 0); + int qisodd = 0; + + for(int i=0;i < 21;i++) { // ceil(log2(DBL_MAX) / 52) + q = removelsb(rintk2(r.x * rd)); + if (fabsk(r.x) < 1.5 * d) q = r.x < 0 ? -1 : 1; + if (fabsk(r.x) < 0.5 * d || (fabsk(r.x) == 0.5 * d && !qisodd)) q = 0; + if (q == 0) break; + if (xisinf(q * -d)) q = q + mulsign(-1, r.x); + qisodd ^= xisodd(q); + r = ddnormalize_d2_d2(ddadd2_d2_d2_d2(r, ddmul_d2_d_d(q, -d))); + } + + double ret = r.x * s; + ret = mulsign(ret, x); + if (xisinf(y)) ret = xisinf(x) ? SLEEF_NAN : x; + if (d == 0) ret = SLEEF_NAN; + + return ret; +} + +EXPORT CONST Sleef_double2 xmodf(double x) { + double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); + fr = fr - (int32_t)fr; + fr = fabsk(x) >= (double)(INT64_C(1) << 52) ? 0 : fr; + Sleef_double2 ret = { copysignk(fr, x), copysignk(x - fr, x) }; + return ret; +} + +typedef struct { + Sleef_double2 a, b; +} dd2; + +static CONST dd2 gammak(double a) { + Sleef_double2 clc = dd(0, 0), clln = dd(1, 0), clld = dd(1, 0), v = dd(1, 0), x, y, z; + double t, u; + + int otiny = fabsk(a) < 1e-306, oref = a < 0.5; + + x = otiny ? dd(0, 0) : (oref ? ddadd2_d2_d_d(1, -a) : dd(a, 0)); + + int o0 = (0.5 <= x.x && x.x <= 1.1), o2 = 2.3 < x.x; + + y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 1), x)); + y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 2), y)); + y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 3), y)); + y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 4), y)); + + clln = (o2 && x.x <= 7) ? y : clln; + + x = (o2 && x.x <= 7) ? ddadd2_d2_d2_d(x, 5) : x; + t = o2 ? (1.0 / x.x) : ddnormalize_d2_d2(ddadd2_d2_d2_d(x, o0 ? -1 : -2)).x; + + u = o2 ? -156.801412704022726379848862 : (o0 ? +0.2947916772827614196e+2 : +0.7074816000864609279e-7); + u = mla(u, t, o2 ? +1.120804464289911606838558160000 : (o0 ? +0.1281459691827820109e+3 : +0.4009244333008730443e-6)); + u = mla(u, t, o2 ? +13.39798545514258921833306020000 : (o0 ? +0.2617544025784515043e+3 : +0.1040114641628246946e-5)); + u = mla(u, t, o2 ? -0.116546276599463200848033357000 : (o0 ? +0.3287022855685790432e+3 : +0.1508349150733329167e-5)); + u = mla(u, t, o2 ? -1.391801093265337481495562410000 : (o0 ? +0.2818145867730348186e+3 : +0.1288143074933901020e-5)); + u = mla(u, t, o2 ? +0.015056113040026424412918973400 : (o0 ? +0.1728670414673559605e+3 : +0.4744167749884993937e-6)); + u = mla(u, t, o2 ? +0.179540117061234856098844714000 : (o0 ? +0.7748735764030416817e+2 : -0.6554816306542489902e-7)); + u = mla(u, t, o2 ? -0.002481743600264997730942489280 : (o0 ? +0.2512856643080930752e+2 : -0.3189252471452599844e-6)); + u = mla(u, t, o2 ? -0.029527880945699120504851034100 : (o0 ? +0.5766792106140076868e+1 : +0.1358883821470355377e-6)); + u = mla(u, t, o2 ? +0.000540164767892604515196325186 : (o0 ? +0.7270275473996180571e+0 : -0.4343931277157336040e-6)); + u = mla(u, t, o2 ? +0.006403362833808069794787256200 : (o0 ? +0.8396709124579147809e-1 : +0.9724785897406779555e-6)); + u = mla(u, t, o2 ? -0.000162516262783915816896611252 : (o0 ? -0.8211558669746804595e-1 : -0.2036886057225966011e-5)); + u = mla(u, t, o2 ? -0.001914438498565477526465972390 : (o0 ? +0.6828831828341884458e-1 : +0.4373363141819725815e-5)); + u = mla(u, t, o2 ? +7.20489541602001055898311517e-05 : (o0 ? -0.7712481339961671511e-1 : -0.9439951268304008677e-5)); + u = mla(u, t, o2 ? +0.000839498720672087279971000786 : (o0 ? +0.8337492023017314957e-1 : +0.2050727030376389804e-4)); + u = mla(u, t, o2 ? -5.17179090826059219329394422e-05 : (o0 ? -0.9094964931456242518e-1 : -0.4492620183431184018e-4)); + u = mla(u, t, o2 ? -0.000592166437353693882857342347 : (o0 ? +0.1000996313575929358e+0 : +0.9945751236071875931e-4)); + u = mla(u, t, o2 ? +6.97281375836585777403743539e-05 : (o0 ? -0.1113342861544207724e+0 : -0.2231547599034983196e-3)); + u = mla(u, t, o2 ? +0.000784039221720066627493314301 : (o0 ? +0.1255096673213020875e+0 : +0.5096695247101967622e-3)); + u = mla(u, t, o2 ? -0.000229472093621399176949318732 : (o0 ? -0.1440498967843054368e+0 : -0.1192753911667886971e-2)); + u = mla(u, t, o2 ? -0.002681327160493827160473958490 : (o0 ? +0.1695571770041949811e+0 : +0.2890510330742210310e-2)); + u = mla(u, t, o2 ? +0.003472222222222222222175164840 : (o0 ? -0.2073855510284092762e+0 : -0.7385551028674461858e-2)); + u = mla(u, t, o2 ? +0.083333333333333333335592087900 : (o0 ? +0.2705808084277815939e+0 : +0.2058080842778455335e-1)); + + y = ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, -0.5), logk2(x)); + y = ddadd2_d2_d2_d2(y, ddneg_d2_d2(x)); + y = ddadd2_d2_d2_d2(y, dd(0.91893853320467278056, -3.8782941580672414498e-17)); // 0.5*log(2*M_PI) + + z = ddadd2_d2_d2_d(ddmul_d2_d_d (u, t), o0 ? -0.4006856343865314862e+0 : -0.6735230105319810201e-1); + z = ddadd2_d2_d2_d(ddmul_d2_d2_d(z, t), o0 ? +0.8224670334241132030e+0 : +0.3224670334241132030e+0); + z = ddadd2_d2_d2_d(ddmul_d2_d2_d(z, t), o0 ? -0.5772156649015328655e+0 : +0.4227843350984671345e+0); + z = ddmul_d2_d2_d(z, t); + + clc = o2 ? y : z; + + clld = o2 ? ddadd2_d2_d2_d(ddmul_d2_d_d(u, t), 1) : clld; + + y = clln; + + clc = otiny ? dd(83.1776616671934334590333, 3.67103459631568507221878e-15) : // log(2^120) + (oref ? ddadd2_d2_d2_d2(dd(1.1447298858494001639, 1.026595116270782638e-17), ddneg_d2_d2(clc)) : clc); // log(M_PI) + clln = otiny ? dd(1, 0) : (oref ? clln : clld); + + if (oref) x = ddmul_d2_d2_d2(clld, sinpik(a - (double)(INT64_C(1) << 28) * (int32_t)(a * (1.0 / (INT64_C(1) << 28))))); + + clld = otiny ? dd(a*((INT64_C(1) << 60)*(double)(INT64_C(1) << 60)), 0) : (oref ? x : y); + + dd2 ret = { clc, dddiv_d2_d2_d2(clln, clld) }; + + return ret; +} + +EXPORT CONST double xtgamma_u1(double a) { + dd2 d = gammak(a); + Sleef_double2 y = ddmul_d2_d2_d2(expk2(d.a), d.b); + double r = y.x + y.y; + r = (a == -SLEEF_INFINITY || (a < 0 && xisint(a)) || (xisnumber(a) && a < 0 && xisnan(r))) ? SLEEF_NAN : r; + r = ((a == SLEEF_INFINITY || xisnumber(a)) && a >= -DBL_MIN && (a == 0 || a > 200 || xisnan(r))) ? mulsign(SLEEF_INFINITY, a) : r; + return r; +} + +EXPORT CONST double xlgamma_u1(double a) { + dd2 d = gammak(a); + Sleef_double2 y = ddadd2_d2_d2_d2(d.a, logk2(ddabs_d2_d2(d.b))); + double r = y.x + y.y; + r = (xisinf(a) || (a <= 0 && xisint(a)) || (xisnumber(a) && xisnan(r))) ? SLEEF_INFINITY : r; + return r; +} + +EXPORT CONST double xerf_u1(double a) { + double s = a, t, u; + Sleef_double2 d; + + a = fabsk(a); + int o0 = a < 1.0, o1 = a < 3.7, o2 = a < 6.0; + u = o0 ? (a*a) : a; + + t = o0 ? +0.6801072401395392157e-20 : o1 ? +0.2830954522087717660e-13 : -0.5846750404269610493e-17; + t = mla(t, u, o0 ? -0.2161766247570056391e-18 : o1 ? -0.1509491946179481940e-11 : +0.6076691048812607898e-15); + t = mla(t, u, o0 ? +0.4695919173301598752e-17 : o1 ? +0.3827857177807173152e-10 : -0.3007518609604893831e-13); + t = mla(t, u, o0 ? -0.9049140419888010819e-16 : o1 ? -0.6139733921558987241e-09 : +0.9427906260824646063e-12); + t = mla(t, u, o0 ? +0.1634018903557411517e-14 : o1 ? +0.6985387934608038824e-08 : -0.2100110908269393629e-10); + t = mla(t, u, o0 ? -0.2783485786333455216e-13 : o1 ? -0.5988224513034371474e-07 : +0.3534639523461223473e-09); + t = mla(t, u, o0 ? +0.4463221276786412722e-12 : o1 ? +0.4005716952355346640e-06 : -0.4664967728285395926e-08); + t = mla(t, u, o0 ? -0.6711366622850138987e-11 : o1 ? -0.2132190104575784400e-05 : +0.4943823283769000532e-07); + t = mla(t, u, o0 ? +0.9422759050232658346e-10 : o1 ? +0.9092461304042630325e-05 : -0.4271203394761148254e-06); + t = mla(t, u, o0 ? -0.1229055530100228477e-08 : o1 ? -0.3079188080966205457e-04 : +0.3034067677404915895e-05); + t = mla(t, u, o0 ? +0.1480719281585085023e-07 : o1 ? +0.7971413443082370762e-04 : -0.1776295289066871135e-04); + t = mla(t, u, o0 ? -0.1636584469123402714e-06 : o1 ? -0.1387853215225442864e-03 : +0.8524547630559505050e-04); + t = mla(t, u, o0 ? +0.1646211436588923363e-05 : o1 ? +0.6469678026257590965e-04 : -0.3290582944961784398e-03); + t = mla(t, u, o0 ? -0.1492565035840624866e-04 : o1 ? +0.4996645280372945860e-03 : +0.9696966068789101157e-03); + t = mla(t, u, o0 ? +0.1205533298178966496e-03 : o1 ? -0.1622802482842520535e-02 : -0.1812527628046986137e-02); + t = mla(t, u, o0 ? -0.8548327023450851166e-03 : o1 ? +0.1615320557049377171e-03 : -0.4725409828123619017e-03); + t = mla(t, u, o0 ? +0.5223977625442188799e-02 : o1 ? +0.1915262325574875607e-01 : +0.2090315427924229266e-01); + t = mla(t, u, o0 ? -0.2686617064513125569e-01 : o1 ? -0.1027818298486033455e+00 : -0.1052041921842776645e+00); + t = mla(t, u, o0 ? +0.1128379167095512753e+00 : o1 ? -0.6366172819842503827e+00 : -0.6345351808766568347e+00); + t = mla(t, u, o0 ? -0.3761263890318375380e+00 : o1 ? -0.1128379590648910469e+01 : -0.1129442929103524396e+01); + d = ddmul_d2_d_d(t, u); + d = ddadd2_d2_d2_d2(d, o0 ? dd(1.1283791670955125586, 1.5335459613165822674e-17) : + o1 ? dd(3.4110644736196137587e-08, -2.4875650708323294246e-24) : + dd(0.00024963035690526438285, -5.4362665034856259795e-21)); + d = o0 ? ddmul_d2_d2_d(d, a) : ddadd_d2_d_d2(1.0, ddneg_d2_d2(expk2(d))); + u = mulsign(o2 ? (d.x + d.y) : 1, s); + u = xisnan(a) ? SLEEF_NAN : u; + return u; +} + +EXPORT CONST double xerfc_u15(double a) { + double s = a, r = 0, t; + Sleef_double2 u, d, x; + a = fabsk(a); + int o0 = a < 1.0, o1 = a < 2.2, o2 = a < 4.2, o3 = a < 27.3; + u = o0 ? ddmul_d2_d_d(a, a) : o1 ? dd(a, 0) : dddiv_d2_d2_d2(dd(1, 0), dd(a, 0)); + + t = o0 ? +0.6801072401395386139e-20 : o1 ? +0.3438010341362585303e-12 : o2 ? -0.5757819536420710449e+2 : +0.2334249729638701319e+5; + t = mla(t, u.x, o0 ? -0.2161766247570055669e-18 : o1 ? -0.1237021188160598264e-10 : o2 ? +0.4669289654498104483e+3 : -0.4695661044933107769e+5); + t = mla(t, u.x, o0 ? +0.4695919173301595670e-17 : o1 ? +0.2117985839877627852e-09 : o2 ? -0.1796329879461355858e+4 : +0.3173403108748643353e+5); + t = mla(t, u.x, o0 ? -0.9049140419888007122e-16 : o1 ? -0.2290560929177369506e-08 : o2 ? +0.4355892193699575728e+4 : +0.3242982786959573787e+4); + t = mla(t, u.x, o0 ? +0.1634018903557410728e-14 : o1 ? +0.1748931621698149538e-07 : o2 ? -0.7456258884965764992e+4 : -0.2014717999760347811e+5); + t = mla(t, u.x, o0 ? -0.2783485786333451745e-13 : o1 ? -0.9956602606623249195e-07 : o2 ? +0.9553977358167021521e+4 : +0.1554006970967118286e+5); + t = mla(t, u.x, o0 ? +0.4463221276786415752e-12 : o1 ? +0.4330010240640327080e-06 : o2 ? -0.9470019905444229153e+4 : -0.6150874190563554293e+4); + t = mla(t, u.x, o0 ? -0.6711366622850136563e-11 : o1 ? -0.1435050600991763331e-05 : o2 ? +0.7387344321849855078e+4 : +0.1240047765634815732e+4); + t = mla(t, u.x, o0 ? +0.9422759050232662223e-10 : o1 ? +0.3460139479650695662e-05 : o2 ? -0.4557713054166382790e+4 : -0.8210325475752699731e+2); + t = mla(t, u.x, o0 ? -0.1229055530100229098e-08 : o1 ? -0.4988908180632898173e-05 : o2 ? +0.2207866967354055305e+4 : +0.3242443880839930870e+2); + t = mla(t, u.x, o0 ? +0.1480719281585086512e-07 : o1 ? -0.1308775976326352012e-05 : o2 ? -0.8217975658621754746e+3 : -0.2923418863833160586e+2); + t = mla(t, u.x, o0 ? -0.1636584469123399803e-06 : o1 ? +0.2825086540850310103e-04 : o2 ? +0.2268659483507917400e+3 : +0.3457461732814383071e+0); + t = mla(t, u.x, o0 ? +0.1646211436588923575e-05 : o1 ? -0.6393913713069986071e-04 : o2 ? -0.4633361260318560682e+2 : +0.5489730155952392998e+1); + t = mla(t, u.x, o0 ? -0.1492565035840623511e-04 : o1 ? -0.2566436514695078926e-04 : o2 ? +0.9557380123733945965e+1 : +0.1559934132251294134e-2); + t = mla(t, u.x, o0 ? +0.1205533298178967851e-03 : o1 ? +0.5895792375659440364e-03 : o2 ? -0.2958429331939661289e+1 : -0.1541741566831520638e+1); + t = mla(t, u.x, o0 ? -0.8548327023450850081e-03 : o1 ? -0.1695715579163588598e-02 : o2 ? +0.1670329508092765480e+0 : +0.2823152230558364186e-5); + t = mla(t, u.x, o0 ? +0.5223977625442187932e-02 : o1 ? +0.2089116434918055149e-03 : o2 ? +0.6096615680115419211e+0 : +0.6249999184195342838e+0); + t = mla(t, u.x, o0 ? -0.2686617064513125222e-01 : o1 ? +0.1912855949584917753e-01 : o2 ? +0.1059212443193543585e-2 : +0.1741749416408701288e-8); + + d = ddmul_d2_d2_d(u, t); + d = ddadd2_d2_d2_d2(d, o0 ? dd(0.11283791670955126141, -4.0175691625932118483e-18) : + o1 ? dd(-0.10277263343147646779, -6.2338714083404900225e-18) : + o2 ? dd(-0.50005180473999022439, 2.6362140569041995803e-17) : + dd(-0.5000000000258444377, -4.0074044712386992281e-17)); + d = ddmul_d2_d2_d2(d, u); + d = ddadd2_d2_d2_d2(d, o0 ? dd(-0.37612638903183753802, 1.3391897206042552387e-17) : + o1 ? dd(-0.63661976742916359662, 7.6321019159085724662e-18) : + o2 ? dd(1.601106273924963368e-06, 1.1974001857764476775e-23) : + dd(2.3761973137523364792e-13, -1.1670076950531026582e-29)); + d = ddmul_d2_d2_d2(d, u); + d = ddadd2_d2_d2_d2(d, o0 ? dd(1.1283791670955125586, 1.5335459613165822674e-17) : + o1 ? dd(-1.1283791674717296161, 8.0896847755965377194e-17) : + o2 ? dd(-0.57236496645145429341, 3.0704553245872027258e-17) : + dd(-0.57236494292470108114, -2.3984352208056898003e-17)); + + x = ddmul_d2_d2_d(o1 ? d : dd(-a, 0), a); + x = o1 ? x : ddadd2_d2_d2_d2(x, d); + x = o0 ? ddsub_d2_d2_d2(dd(1, 0), x) : expk2(x); + x = o1 ? x : ddmul_d2_d2_d2(x, u); + + r = o3 ? (x.x + x.y) : 0; + if (s < 0) r = 2 - r; + r = xisnan(s) ? SLEEF_NAN : r; + return r; +} + +#ifdef ENABLE_MAIN +// gcc -w -DENABLE_MAIN -I../common sleefdp.c rempitab.c -lm +#include +int main(int argc, char **argv) { + double d1 = atof(argv[1]); + printf("arg1 = %.20g\n", d1); + //int i1 = atoi(argv[1]); + //double d2 = atof(argv[2]); + //printf("arg2 = %.20g\n", d2); + //printf("%d\n", (int)d2); +#if 0 + double d3 = atof(argv[3]); + printf("arg3 = %.20g\n", d3); +#endif + //printf("%g\n", pow2i(i1)); + //int exp = xexpfrexp(d1); + //double r = xnextafter(d1, d2); + //double r = xfma(d1, d2, d3); + printf("test = %.20g\n", xcos_u1(d1)); + //printf("test = %.20g\n", xlog(d1)); + //r = nextafter(d1, d2); + printf("corr = %.20g\n", cos(d1)); + //printf("%.20g %.20g\n", xround(d1), xrint(d1)); + //Sleef_double2 r = xsincospi_u35(d); + //printf("%g, %g\n", (double)r.x, (double)r.y); +} +#endif diff --git a/src/sleefsimddp.c b/src/sleefsimddp.c new file mode 100644 index 00000000..99fc67c6 --- /dev/null +++ b/src/sleefsimddp.c @@ -0,0 +1,3820 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// Always use -ffp-contract=off option to compile SLEEF. + +#if !defined(SLEEF_GENHEADER) +#include +#include +#include +#include +#endif + +#include "misc.h" + +extern const double Sleef_rempitabdp[]; + +#define __SLEEFSIMDDP_C__ + +#if (defined(_MSC_VER)) +#pragma fp_contract (off) +#endif + +// Intel + +#ifdef ENABLE_SSE2 +#define CONFIG 2 +#include "helpersse2.h" +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renamesse2_gnuabi.h" +#else +#include "renamesse2.h" +#endif +#endif +#endif + +#ifdef ENABLE_SSE4 +#define CONFIG 4 +#include "helpersse2.h" +#ifdef DORENAME +#include "renamesse4.h" +#endif +#endif + +#ifdef ENABLE_AVX +#define CONFIG 1 +#include "helperavx.h" +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renameavx_gnuabi.h" +#else +#include "renameavx.h" +#endif +#endif +#endif + +#ifdef ENABLE_FMA4 +#define CONFIG 4 +#include "helperavx.h" +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renamefma4_gnuabi.h" +#else +#include "renamefma4.h" +#endif +#endif +#endif + +#ifdef ENABLE_AVX2 +#define CONFIG 1 +#include "helperavx2.h" +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renameavx2_gnuabi.h" +#else +#include "renameavx2.h" +#endif +#endif +#endif + +#ifdef ENABLE_AVX2128 +#define CONFIG 1 +#include "helperavx2_128.h" +#ifdef DORENAME +#include "renameavx2128.h" +#endif +#endif + +#ifdef ENABLE_AVX512F +#define CONFIG 1 +#include "helperavx512f.h" +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renameavx512f_gnuabi.h" +#else +#include "renameavx512f.h" +#endif +#endif +#endif + +#ifdef ENABLE_AVX512FNOFMA +#define CONFIG 2 +#include "helperavx512f.h" +#ifdef DORENAME +#include "renameavx512fnofma.h" +#endif +#endif + +// Arm + +#ifdef ENABLE_ADVSIMD +#define CONFIG 1 +#include "helperadvsimd.h" +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renameadvsimd_gnuabi.h" +#else +#include "renameadvsimd.h" +#endif +#endif +#endif + +#ifdef ENABLE_ADVSIMDNOFMA +#define CONFIG 2 +#include "helperadvsimd.h" +#ifdef DORENAME +#include "renameadvsimdnofma.h" +#endif +#endif + +#ifdef ENABLE_SVE +#define CONFIG 1 +#include "helpersve.h" +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renamesve_gnuabi.h" +#else +#include "renamesve.h" +#endif /* ENABLE_GNUABI */ +#endif /* DORENAME */ +#endif /* ENABLE_SVE */ + +#ifdef ENABLE_SVENOFMA +#define CONFIG 2 +#include "helpersve.h" +#ifdef DORENAME +#include "renamesvenofma.h" +#endif /* DORENAME */ +#endif /* ENABLE_SVE */ + +// IBM + +#ifdef ENABLE_VSX +#define CONFIG 1 +#include "helperpower_128.h" +#ifdef DORENAME +#include "renamevsx.h" +#endif +#endif + +#ifdef ENABLE_VSXNOFMA +#define CONFIG 2 +#include "helperpower_128.h" +#ifdef DORENAME +#include "renamevsxnofma.h" +#endif +#endif + +#ifdef ENABLE_ZVECTOR2 +#define CONFIG 140 +#include "helpers390x_128.h" +#ifdef DORENAME +#include "renamezvector2.h" +#endif +#endif + +#ifdef ENABLE_ZVECTOR2NOFMA +#define CONFIG 141 +#include "helpers390x_128.h" +#ifdef DORENAME +#include "renamezvector2nofma.h" +#endif +#endif + +// Generic + +#ifdef ENABLE_VECEXT +#define CONFIG 1 +#include "helpervecext.h" +#ifdef DORENAME +#include "renamevecext.h" +#endif +#endif + +#ifdef ENABLE_PUREC +#define CONFIG 1 +#include "helperpurec.h" +#ifdef DORENAME +#include "renamepurec.h" +#endif +#endif + +#ifdef ENABLE_PUREC_SCALAR +#define CONFIG 1 +#include "helperpurec_scalar.h" +#ifdef DORENAME +#include "renamepurec_scalar.h" +#endif +#endif + +#ifdef ENABLE_PURECFMA_SCALAR +#define CONFIG 2 +#include "helperpurec_scalar.h" +#ifdef DORENAME +#include "renamepurecfma_scalar.h" +#endif +#endif + +// + +#define MLA(x, y, z) vmla_vd_vd_vd_vd((x), (y), (z)) +#define C2V(c) vcast_vd_d(c) +#include "estrin.h" + +// + +#include "dd.h" + +// + +static INLINE VECTOR_CC vopmask vnot_vo64_vo64(vopmask x) { + return vxor_vo_vo_vo(x, veq64_vo_vm_vm(vcast_vm_i_i(0, 0), vcast_vm_i_i(0, 0))); +} + +static INLINE CONST VECTOR_CC vopmask vsignbit_vo_vd(vdouble d) { + return veq64_vo_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))); +} + +// return d0 < d1 ? x : y +static INLINE CONST VECTOR_CC vint vsel_vi_vd_vd_vi_vi(vdouble d0, vdouble d1, vint x, vint y) { return vsel_vi_vo_vi_vi(vcast_vo32_vo64(vlt_vo_vd_vd(d0, d1)), x, y); } + +// return d0 < 0 ? x : 0 +static INLINE CONST VECTOR_CC vint vsel_vi_vd_vi(vdouble d, vint x) { return vand_vi_vo_vi(vcast_vo32_vo64(vsignbit_vo_vd(d)), x); } + +static INLINE CONST VECTOR_CC vopmask visnegzero_vo_vd(vdouble d) { + return veq64_vo_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))); +} + +static INLINE CONST VECTOR_CC vopmask visnumber_vo_vd(vdouble x) { + return vandnot_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, x)); +} + +static INLINE CONST VECTOR_CC vmask vsignbit_vm_vd(vdouble d) { + return vand_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))); +} + +static INLINE CONST VECTOR_CC vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) { + return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); +} + +static INLINE CONST VECTOR_CC vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) { + return vreinterpret_vd_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(x)), + vand_vm_vm_vm (vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(y)))); +} + +static INLINE CONST VECTOR_CC vdouble vsign_vd_vd(vdouble d) { + return vmulsign_vd_vd_vd(vcast_vd_d(1.0), d); +} + +static INLINE CONST VECTOR_CC vdouble vpow2i_vd_vi(vint q) { + q = vadd_vi_vi_vi(vcast_vi_i(0x3ff), q); + vint2 r = vcastu_vi2_vi(q); + return vreinterpret_vd_vi2(vsll_vi2_vi2_i(r, 20)); +} + +static INLINE CONST VECTOR_CC vdouble vldexp_vd_vd_vi(vdouble x, vint q) { + vint m = vsra_vi_vi_i(q, 31); + m = vsll_vi_vi_i(vsub_vi_vi_vi(vsra_vi_vi_i(vadd_vi_vi_vi(m, q), 9), m), 7); + q = vsub_vi_vi_vi(q, vsll_vi_vi_i(m, 2)); + m = vadd_vi_vi_vi(vcast_vi_i(0x3ff), m); + m = vandnot_vi_vo_vi(vgt_vo_vi_vi(vcast_vi_i(0), m), m); + m = vsel_vi_vo_vi_vi(vgt_vo_vi_vi(m, vcast_vi_i(0x7ff)), vcast_vi_i(0x7ff), m); + vint2 r = vcastu_vi2_vi(m); + vdouble y = vreinterpret_vd_vi2(vsll_vi2_vi2_i(r, 20)); + return vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(x, y), y), y), y), vpow2i_vd_vi(q)); +} + +static INLINE CONST VECTOR_CC vdouble vldexp2_vd_vd_vi(vdouble d, vint e) { + return vmul_vd_vd_vd(vmul_vd_vd_vd(d, vpow2i_vd_vi(vsra_vi_vi_i(e, 1))), vpow2i_vd_vi(vsub_vi_vi_vi(e, vsra_vi_vi_i(e, 1)))); +} + +static INLINE CONST VECTOR_CC vdouble vldexp3_vd_vd_vi(vdouble d, vint q) { + return vreinterpret_vd_vi2(vadd_vi2_vi2_vi2(vreinterpret_vi2_vd(d), vsll_vi2_vi2_i(vcastu_vi2_vi(q), 20))); +} + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) +static INLINE CONST VECTOR_CC vint vilogbk_vi_vd(vdouble d) { + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(4.9090934652977266E-91)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(vcast_vd_d(2.037035976334486E90), d), d); + vint q = vcastu_vi_vi2(vreinterpret_vi2_vd(d)); + q = vand_vi_vi_vi(q, vcast_vi_i(((1 << 12)-1) << 20)); + q = vsrl_vi_vi_i(q, 20); + q = vsub_vi_vi_vi(q, vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vcast_vi_i(300 + 0x3ff), vcast_vi_i(0x3ff))); + return q; +} + +static INLINE CONST VECTOR_CC vint vilogb2k_vi_vd(vdouble d) { + vint q = vcastu_vi_vi2(vreinterpret_vi2_vd(d)); + q = vsrl_vi_vi_i(q, 20); + q = vand_vi_vi_vi(q, vcast_vi_i(0x7ff)); + q = vsub_vi_vi_vi(q, vcast_vi_i(0x3ff)); + return q; +} +#endif + +static INLINE CONST VECTOR_CC vopmask visint_vo_vd(vdouble d) { +#ifdef FULL_FP_ROUNDING + return veq_vo_vd_vd(vtruncate_vd_vd(d), d); +#else + vdouble x = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0 / (INT64_C(1) << 31)))); + x = vmla_vd_vd_vd_vd(vcast_vd_d(-(double)(INT64_C(1) << 31)), x, d); + return vor_vo_vo_vo(veq_vo_vd_vd(vtruncate_vd_vd(x), x), + vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 53))); +#endif +} + +static INLINE CONST VECTOR_CC vopmask visodd_vo_vd(vdouble d) { +#ifdef FULL_FP_ROUNDING + vdouble x = vmul_vd_vd_vd(d, vcast_vd_d(0.5)); + return vneq_vo_vd_vd(vtruncate_vd_vd(x), x); +#else + vdouble x = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0 / (INT64_C(1) << 31)))); + x = vmla_vd_vd_vd_vd(vcast_vd_d(-(double)(INT64_C(1) << 31)), x, d); + + return vand_vo_vo_vo(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vtruncate_vi_vd(x), vcast_vi_i(1)), vcast_vi_i(1))), + vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 53))); +#endif +} + +// + +EXPORT CONST VECTOR_CC vdouble xldexp(vdouble x, vint q) { return vldexp_vd_vd_vi(x, q); } + +EXPORT CONST VECTOR_CC vint xilogb(vdouble d) { + vdouble e = vcast_vd_vi(vilogbk_vi_vd(vabs_vd_vd(d))); + e = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_FP_ILOGB0), e); + e = vsel_vd_vo_vd_vd(visnan_vo_vd(d), vcast_vd_d(SLEEF_FP_ILOGBNAN), e); + e = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(INT_MAX), e); + return vrint_vi_vd(e); +} + +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +typedef struct { + vdouble d; + vint i; +} di_t; + +static vdouble digetd_vd_di(di_t d) { return d.d; } +static vint digeti_vi_di(di_t d) { return d.i; } +static di_t disetdi_di_vd_vi(vdouble d, vint i) { + di_t r = { d, i }; + return r; +} + +typedef struct { + vdouble2 dd; + vint i; +} ddi_t; + +static vdouble2 ddigetdd_vd2_ddi(ddi_t d) { return d.dd; } +static vint ddigeti_vi_ddi(ddi_t d) { return d.i; } +static ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) { + ddi_t r = { v, i }; + return r; +} +static ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) { + ddi.dd = v; + return ddi; +} +#endif + +static INLINE CONST VECTOR_CC vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) { + return vreinterpret_vd_vm(vor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); +} + +static INLINE CONST di_t rempisub(vdouble x) { +#ifdef FULL_FP_ROUNDING + vdouble y = vrint_vd_vd(vmul_vd_vd_vd(x, vcast_vd_d(4))); + vint vi = vtruncate_vi_vd(vsub_vd_vd_vd(y, vmul_vd_vd_vd(vrint_vd_vd(x), vcast_vd_d(4)))); + return disetdi_di_vd_vi(vsub_vd_vd_vd(x, vmul_vd_vd_vd(y, vcast_vd_d(0.25))), vi); +#else + vdouble c = vmulsign_vd_vd_vd(vcast_vd_d(INT64_C(1) << 52), x); + vdouble rint4x = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(vmul_vd_vd_vd(vcast_vd_d(4), x)), vcast_vd_d(INT64_C(1) << 52)), + vmul_vd_vd_vd(vcast_vd_d(4), x), + vorsign_vd_vd_vd(vsub_vd_vd_vd(vmla_vd_vd_vd_vd(vcast_vd_d(4), x, c), c), x)); + vdouble rintx = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52)), + x, vorsign_vd_vd_vd(vsub_vd_vd_vd(vadd_vd_vd_vd(x, c), c), x)); + return disetdi_di_vd_vi(vmla_vd_vd_vd_vd(vcast_vd_d(-0.25), rint4x, x), + vtruncate_vi_vd(vmla_vd_vd_vd_vd(vcast_vd_d(-4), rintx, rint4x))); +#endif +} + +static INLINE CONST ddi_t rempi(vdouble a) { + vdouble2 x, y, z; + vint ex = vilogb2k_vi_vd(a); +#if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) + ex = vandnot_vi_vi_vi(vsra_vi_vi_i(ex, 31), ex); + ex = vand_vi_vi_vi(ex, vcast_vi_i(1023)); +#endif + ex = vsub_vi_vi_vi(ex, vcast_vi_i(55)); + vint q = vand_vi_vo_vi(vgt_vo_vi_vi(ex, vcast_vi_i(700-55)), vcast_vi_i(-64)); + a = vldexp3_vd_vd_vi(a, q); + ex = vandnot_vi_vi_vi(vsra_vi_vi_i(ex, 31), ex); + ex = vsll_vi_vi_i(ex, 2); + x = ddmul_vd2_vd_vd(a, vgather_vd_p_vi(Sleef_rempitabdp, ex)); + di_t di = rempisub(vd2getx_vd_vd2(x)); + q = digeti_vi_di(di); + x = vd2setx_vd2_vd2_vd(x, digetd_vd_di(di)); + x = ddnormalize_vd2_vd2(x); + y = ddmul_vd2_vd_vd(a, vgather_vd_p_vi(Sleef_rempitabdp+1, ex)); + x = ddadd2_vd2_vd2_vd2(x, y); + di = rempisub(vd2getx_vd_vd2(x)); + q = vadd_vi_vi_vi(q, digeti_vi_di(di)); + x = vd2setx_vd2_vd2_vd(x, digetd_vd_di(di)); + x = ddnormalize_vd2_vd2(x); + y = vcast_vd2_vd_vd(vgather_vd_p_vi(Sleef_rempitabdp+2, ex), vgather_vd_p_vi(Sleef_rempitabdp+3, ex)); + y = ddmul_vd2_vd2_vd(y, a); + x = ddadd2_vd2_vd2_vd2(x, y); + x = ddnormalize_vd2_vd2(x); + x = ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(3.141592653589793116*2, 1.2246467991473532072e-16*2)); + vopmask o = vlt_vo_vd_vd(vabs_vd_vd(a), vcast_vd_d(0.7)); + x = vd2setx_vd2_vd2_vd(x, vsel_vd_vo_vd_vd(o, a, vd2getx_vd_vd2(x))); + x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); + return ddisetddi_ddi_vd2_vi(x, q); +} + +EXPORT CONST VECTOR_CC vdouble xsin(vdouble d) { +#if !defined(DETERMINISTIC) +// The SIMD source files(sleefsimd?p.c) are compiled twice for each +// vector extension, with DETERMINISTIC macro turned on and off. +// Below is the normal(faster) implementation of sin function. The +// function name xsin will be renamed to Sleef_sind2_u35sse2 with +// renamesse2.h, for example. + + vdouble u, s, r = d; + vint ql; + + if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { + vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); + ql = vrint_vi_vd(dql); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2), d); + } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), dqh)); + ql = vrint_vi_vd(dql); + + d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A), d); + d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B), d); + d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C), d); + d = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D), d); + } else { + ddi_t ddi = rempi(d); + ql = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); + ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); + ql = vsra_vi_vi_i(ql, 2); + vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(1)); + vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi))), + vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)))); + x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); + ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); + d = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); + d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); + } + + s = vmul_vd_vd_vd(d, d); + + d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY8(s, s2, s4, + -7.97255955009037868891952e-18, + 2.81009972710863200091251e-15, + -7.64712219118158833288484e-13, + 1.60590430605664501629054e-10, + -2.50521083763502045810755e-08, + 2.75573192239198747630416e-06, + -0.000198412698412696162806809, + 0.00833333333333332974823815); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); + + u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); + + u = vsel_vd_vo_vd_vd(visnegzero_vo_vd(r), r, u); + + return u; + +#else // #if !defined(DETERMINISTIC) + +// This is the deterministic implementation of sin function. Returned +// values from deterministic functions are bitwise consistent across +// all platforms. The function name xsin will be renamed to +// Sleef_cinz_sind2_u35sse2 with renamesse2.h, for example. The +// renaming by rename*.h is switched according to DETERMINISTIC macro. + vdouble u, s, r = d; + vint ql; + + vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); + ql = vrint_vi_vd(dql); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2), d); + vopmask g = vlt_vo_vd_vd(vabs_vd_vd(r), vcast_vd_d(TRIGRANGEMAX2)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(r, vcast_vd_d(M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(r, vcast_vd_d(M_1_PI), dqh)); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), r); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A), u); + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B), u); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B), u); + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C), u); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C), u); + u = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D), u); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); + d = vsel_vd_vo_vd_vd(g, d, u); + g = vlt_vo_vd_vd(vabs_vd_vd(r), vcast_vd_d(TRIGRANGEMAX)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + ddi_t ddi = rempi(r); + vint ql2 = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); + ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); + ql2 = vsra_vi_vi_i(ql2, 2); + vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(1)); + vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi))), + vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)))); + x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); + ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); + u = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); + d = vsel_vd_vo_vd_vd(g, d, u); + d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); + } + } + + s = vmul_vd_vd_vd(d, d); + + d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY8(s, s2, s4, + -7.97255955009037868891952e-18, + 2.81009972710863200091251e-15, + -7.64712219118158833288484e-13, + 1.60590430605664501629054e-10, + -2.50521083763502045810755e-08, + 2.75573192239198747630416e-06, + -0.000198412698412696162806809, + 0.00833333333333332974823815); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); + + u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); + + u = vsel_vd_vo_vd_vd(visnegzero_vo_vd(r), r, u); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +EXPORT CONST VECTOR_CC vdouble xsin_u1(vdouble d) { +#if !defined(DETERMINISTIC) + vdouble u; + vdouble2 s, t, x; + vint ql; + + if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { + const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); + ql = vrint_vi_vd(dql); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); + s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2))); + } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + const vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), dqh)); + ql = vrint_vi_vd(dql); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), d); + s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D))); + } else { + ddi_t ddi = rempi(d); + ql = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); + ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); + ql = vsra_vi_vi_i(ql, 2); + vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(1)); + vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi))), + vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)))); + x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); + ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); + s = ddnormalize_vd2_vd2(ddigetdd_vd2_ddi(ddi)); + s = vd2setx_vd2_vd2_vd(s, vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(vd2getx_vd_vd2(s))))); + } + + t = s; + s = ddsqu_vd2_vd2(s); + + vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY6(vd2getx_vd_vd2(s), s2, s4, + 2.72052416138529567917983e-15, + -7.6429259411395447190023e-13, + 1.60589370117277896211623e-10, + -2.5052106814843123359368e-08, + 2.75573192104428224777379e-06, + -0.000198412698412046454654947); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.00833333333333318056201922)); + + x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, vd2getx_vd_vd2(s))), s)); + u = ddmul_vd_vd2_vd2(t, x); + + u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), + vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); + u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vdouble u; + vdouble2 s, t, x; + vint ql; + + vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); + vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); + ql = vrint_vi_vd(dql); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); + x = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2))); + + if (!LIKELY(vtestallones_i_vo64(g))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + const vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), dqh)); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), d); + s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D))); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); + x = vsel_vd2_vo_vd2_vd2(g, x, s); + g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + ddi_t ddi = rempi(d); + vint ql2 = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); + ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); + ql2 = vsra_vi_vi_i(ql2, 2); + vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(1)); + vdouble2 t = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi))), + vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)))); + t = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), t); + ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), t, ddigetdd_vd2_ddi(ddi))); + s = ddnormalize_vd2_vd2(ddigetdd_vd2_ddi(ddi)); + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); + x = vsel_vd2_vo_vd2_vd2(g, x, s); + x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); + } + } + + t = x; + s = ddsqu_vd2_vd2(x); + + vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY6(vd2getx_vd_vd2(s), s2, s4, + 2.72052416138529567917983e-15, + -7.6429259411395447190023e-13, + 1.60589370117277896211623e-10, + -2.5052106814843123359368e-08, + 2.75573192104428224777379e-06, + -0.000198412698412046454654947); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.00833333333333318056201922)); + + x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, vd2getx_vd_vd2(s))), s)); + u = ddmul_vd_vd2_vd2(t, x); + + u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), + vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); + + u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +EXPORT CONST VECTOR_CC vdouble xcos(vdouble d) { +#if !defined(DETERMINISTIC) + vdouble u, s, r = d; + vint ql; + + if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { + vdouble dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), + vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))), + vcast_vd_d(1)); + ql = vrint_vi_vd(dql); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), d); + } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { + vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); + ql = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)), + vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vcast_vi_i(1)); + vdouble dql = vcast_vd_vi(ql); + + d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), d); + d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), d); + d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), d); + d = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), d); + } else { + ddi_t ddi = rempi(d); + ql = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); + ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); + ql = vsra_vi_vi_i(ql, 1); + vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(0)); + vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); + vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), + vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); + x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); + ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); + d = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); + d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); + } + + s = vmul_vd_vd_vd(d, d); + + d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY8(s, s2, s4, + -7.97255955009037868891952e-18, + 2.81009972710863200091251e-15, + -7.64712219118158833288484e-13, + 1.60590430605664501629054e-10, + -2.50521083763502045810755e-08, + 2.75573192239198747630416e-06, + -0.000198412698412696162806809, + 0.00833333333333332974823815); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); + + u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vdouble u, s, r = d; + vint ql; + + vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); + vdouble dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), + vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))), + vcast_vd_d(1)); + ql = vrint_vi_vd(dql); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); + d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), d); + + if (!LIKELY(vtestallones_i_vo64(g))) { + vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(r, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); + vint ql2 = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(r, vcast_vd_d(M_1_PI)), + vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vcast_vi_i(1)); + vdouble dql = vcast_vd_vi(ql2); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), r); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), u); + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), u); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), u); + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), u); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), u); + u = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), u); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); + d = vsel_vd_vo_vd_vd(g, d, u); + g = vlt_vo_vd_vd(vabs_vd_vd(r), vcast_vd_d(TRIGRANGEMAX)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + ddi_t ddi = rempi(r); + vint ql2 = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); + ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); + ql2 = vsra_vi_vi_i(ql2, 1); + vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(0)); + vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); + vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), + vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); + x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); + ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); + u = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); + d = vsel_vd_vo_vd_vd(g, d, u); + d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); + } + } + + s = vmul_vd_vd_vd(d, d); + + d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY8(s, s2, s4, + -7.97255955009037868891952e-18, + 2.81009972710863200091251e-15, + -7.64712219118158833288484e-13, + 1.60590430605664501629054e-10, + -2.50521083763502045810755e-08, + 2.75573192239198747630416e-06, + -0.000198412698412696162806809, + 0.00833333333333332974823815); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); + + u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +EXPORT CONST VECTOR_CC vdouble xcos_u1(vdouble d) { +#if !defined(DETERMINISTIC) + vdouble u; + vdouble2 s, t, x; + vint ql; + + if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { + vdouble dql = vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))); + dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), dql, vcast_vd_d(1)); + ql = vrint_vi_vd(dql); + s = ddadd2_vd2_vd_vd(d, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); + } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { + vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); + ql = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)), + vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vcast_vi_i(1)); + const vdouble dql = vcast_vd_vi(ql); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + s = ddadd2_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); + } else { + ddi_t ddi = rempi(d); + ql = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); + ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); + ql = vsra_vi_vi_i(ql, 1); + vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(0)); + vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); + vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), + vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); + x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); + ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); + s = ddnormalize_vd2_vd2(ddigetdd_vd2_ddi(ddi)); + s = vd2setx_vd2_vd2_vd(s, vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(vd2getx_vd_vd2(s))))); + } + + t = s; + s = ddsqu_vd2_vd2(s); + + vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY6(vd2getx_vd_vd2(s), s2, s4, + 2.72052416138529567917983e-15, + -7.6429259411395447190023e-13, + 1.60589370117277896211623e-10, + -2.5052106814843123359368e-08, + 2.75573192104428224777379e-06, + -0.000198412698412046454654947); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.00833333333333318056201922)); + + x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, vd2getx_vd_vd2(s))), s)); + u = ddmul_vd_vd2_vd2(t, x); + + u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vdouble u; + vdouble2 s, t, x; + vint ql; + + vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); + vdouble dql = vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))); + dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), dql, vcast_vd_d(1)); + ql = vrint_vi_vd(dql); + x = ddadd2_vd2_vd_vd(d, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5))); + x = ddadd_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); + + if (!LIKELY(vtestallones_i_vo64(g))) { + vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); + vint ql2 = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)), + vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vcast_vi_i(1)); + const vdouble dql = vcast_vd_vi(ql2); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + s = ddadd2_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); + x = vsel_vd2_vo_vd2_vd2(g, x, s); + g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + ddi_t ddi = rempi(d); + vint ql2 = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); + ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); + ql2 = vsra_vi_vi_i(ql2, 1); + vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(0)); + vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); + vdouble2 t = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), + vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); + t = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), t); + ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), t, ddigetdd_vd2_ddi(ddi))); + s = ddnormalize_vd2_vd2(ddigetdd_vd2_ddi(ddi)); + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); + x = vsel_vd2_vo_vd2_vd2(g, x, s); + x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); + } + } + + t = x; + s = ddsqu_vd2_vd2(x); + + vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY6(vd2getx_vd_vd2(s), s2, s4, + 2.72052416138529567917983e-15, + -7.6429259411395447190023e-13, + 1.60589370117277896211623e-10, + -2.5052106814843123359368e-08, + 2.75573192104428224777379e-06, + -0.000198412698412046454654947); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.00833333333333318056201922)); + + x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, vd2getx_vd_vd2(s))), s)); + u = ddmul_vd_vd2_vd2(t, x); + + u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +#ifdef ENABLE_GNUABI +#define TYPE2_FUNCATR static INLINE CONST +#define TYPE6_FUNCATR static INLINE CONST +#define SQRTU05_FUNCATR static INLINE CONST +#define XSINCOS sincosk +#define XSINCOS_U1 sincosk_u1 +#define XSINCOSPI_U05 sincospik_u05 +#define XSINCOSPI_U35 sincospik_u35 +#define XMODF modfk +#else +#define TYPE2_FUNCATR EXPORT +#define TYPE6_FUNCATR EXPORT CONST +#define SQRTU05_FUNCATR EXPORT CONST +#define XSINCOS xsincos +#define XSINCOS_U1 xsincos_u1 +#define XSINCOSPI_U05 xsincospi_u05 +#define XSINCOSPI_U35 xsincospi_u35 +#define XMODF xmodf +#endif + +TYPE2_FUNCATR VECTOR_CC vdouble2 XSINCOS(vdouble d) { +#if !defined(DETERMINISTIC) + vopmask o; + vdouble u, t, rx, ry, s; + vdouble2 r; + vint ql; + + if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { + vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); + ql = vrint_vi_vd(dql); + s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); + s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), s); + } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); + ql = vrint_vi_vd(dql); + + s = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), s); + s = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), s); + s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), s); + s = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), s); + s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), s); + s = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), s); + } else { + ddi_t ddi = rempi(d); + ql = ddigeti_vi_ddi(ddi); + s = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); + s = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(s))); + } + + t = s; + + s = vmul_vd_vd_vd(s, s); + + u = vcast_vd_d(1.58938307283228937328511e-10); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50506943502539773349318e-08)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573131776846360512547e-06)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698278911770864914)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0083333333333191845961746)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666130709393)); + + rx = vmla_vd_vd_vd_vd(vmul_vd_vd_vd(u, s), t, t); + rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); + + u = vcast_vd_d(-1.13615350239097429531523e-11); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.08757471207040055479366e-09)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.75573144028847567498567e-07)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.48015872890001867311915e-05)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.00138888888888714019282329)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665519592062)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.5)); + + ry = vmla_vd_vd_vd_vd(s, u, vcast_vd_d(1)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); + r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); + r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); + + return r; + +#else // #if !defined(DETERMINISTIC) + + vopmask o; + vdouble u, t, rx, ry, s = d; + vdouble2 r; + vint ql; + + vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(s, vcast_vd_d(2 * M_1_PI))); + ql = vrint_vi_vd(dql); + s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), s); + s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), s); + vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), u); + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), u); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), u); + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), u); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), u); + u = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), u); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); + s = vsel_vd_vo_vd_vd(g, s, u); + g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + ddi_t ddi = rempi(d); + u = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); + u = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(u))); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ddigeti_vi_ddi(ddi)); + s = vsel_vd_vo_vd_vd(g, s, u); + } + } + + t = s; + + s = vmul_vd_vd_vd(s, s); + + u = vcast_vd_d(1.58938307283228937328511e-10); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50506943502539773349318e-08)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573131776846360512547e-06)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698278911770864914)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0083333333333191845961746)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666130709393)); + + rx = vmla_vd_vd_vd_vd(vmul_vd_vd_vd(u, s), t, t); + rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); + + u = vcast_vd_d(-1.13615350239097429531523e-11); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.08757471207040055479366e-09)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.75573144028847567498567e-07)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.48015872890001867311915e-05)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.00138888888888714019282329)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665519592062)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.5)); + + ry = vmla_vd_vd_vd_vd(s, u, vcast_vd_d(1)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); + r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); + r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); + + return r; +#endif // #if !defined(DETERMINISTIC) +} + +TYPE2_FUNCATR VECTOR_CC vdouble2 XSINCOS_U1(vdouble d) { +#if !defined(DETERMINISTIC) + vopmask o; + vdouble u, rx, ry; + vdouble2 r, s, t, x; + vint ql; + + if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { + const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); + ql = vrint_vi_vd(dql); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); + s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); + } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + const vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); + ql = vrint_vi_vd(dql); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + s = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); + } else { + ddi_t ddi = rempi(d); + ql = ddigeti_vi_ddi(ddi); + s = ddigetdd_vd2_ddi(ddi); + o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); + s = vd2setxy_vd2_vd_vd(vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(s)))), + vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(s))))); + } + + t = s; + + s = vd2setx_vd2_vd2_vd(s, ddsqu_vd_vd2(s)); + + u = vcast_vd_d(1.58938307283228937328511e-10); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-2.50506943502539773349318e-08)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.75573131776846360512547e-06)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.000198412698278911770864914)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.0083333333333191845961746)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.166666666666666130709393)); + + u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(t))); + + x = ddadd_vd2_vd2_vd(t, u); + rx = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); + + u = vcast_vd_d(-1.13615350239097429531523e-11); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.08757471207040055479366e-09)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-2.75573144028847567498567e-07)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.48015872890001867311915e-05)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.00138888888888714019282329)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.0416666666666665519592062)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.5)); + + x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(vd2getx_vd_vd2(s), u)); + ry = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); + r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); + r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); + + return r; + +#else // #if !defined(DETERMINISTIC) + + vopmask o; + vdouble u, rx, ry; + vdouble2 r, s, t, x; + vint ql; + + const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); + ql = vrint_vi_vd(dql); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); + s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); + vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + const vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + x = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); + x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); + x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); + x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); + x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); + x = ddadd_vd2_vd2_vd(x, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); + s = vsel_vd2_vo_vd2_vd2(g, s, x); + g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + ddi_t ddi = rempi(d); + x = ddigetdd_vd2_ddi(ddi); + o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); + x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); + x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ddigeti_vi_ddi(ddi)); + s = vsel_vd2_vo_vd2_vd2(g, s, x); + } + } + + t = s; + + s = vd2setx_vd2_vd2_vd(s, ddsqu_vd_vd2(s)); + + u = vcast_vd_d(1.58938307283228937328511e-10); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-2.50506943502539773349318e-08)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.75573131776846360512547e-06)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.000198412698278911770864914)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.0083333333333191845961746)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.166666666666666130709393)); + + u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(t))); + + x = ddadd_vd2_vd2_vd(t, u); + rx = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); + + u = vcast_vd_d(-1.13615350239097429531523e-11); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.08757471207040055479366e-09)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-2.75573144028847567498567e-07)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.48015872890001867311915e-05)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.00138888888888714019282329)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.0416666666666665519592062)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.5)); + + x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(vd2getx_vd_vd2(s), u)); + ry = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); + r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); + r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); + + return r; +#endif // #if !defined(DETERMINISTIC) +} + +#if !defined(DETERMINISTIC) +TYPE2_FUNCATR VECTOR_CC vdouble2 XSINCOSPI_U05(vdouble d) { + vopmask o; + vdouble u, s, t, rx, ry; + vdouble2 r, x, s2; + + u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); + vint q = vtruncate_vi_vd(u); + q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); + s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); + + t = s; + s = vmul_vd_vd_vd(s, s); + s2 = ddmul_vd2_vd_vd(t, t); + + // + + u = vcast_vd_d(-2.02461120785182399295868e-14); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(6.94821830580179461327784e-12)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-1.75724749952853179952664e-09)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(3.13361688966868392878422e-07)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-3.6576204182161551920361e-05)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00249039457019271850274356)); + x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vcast_vd2_d_d(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); + x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vcast_vd2_d_d(0.785398163397448278999491, 3.06287113727155002607105e-17)); + + x = ddmul_vd2_vd2_vd(x, t); + rx = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); + + // + + u = vcast_vd_d(9.94480387626843774090208e-16); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-3.89796226062932799164047e-13)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1.15011582539996035266901e-10)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.4611369501044697495359e-08)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(3.59086044859052754005062e-06)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000325991886927389905997954)); + x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vcast_vd2_d_d(0.0158543442438155018914259, -1.04693272280631521908845e-18)); + x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vcast_vd2_d_d(-0.308425137534042437259529, -1.95698492133633550338345e-17)); + + x = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd2(x, s2), vcast_vd_d(1)); + ry = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + // + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0))); + r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(4)), vcast_vi_i(4))); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(4)), vcast_vi_i(4))); + r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); + + o = vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + r = vd2sety_vd2_vd2_vd(r, vsel_vd_vo_vd_vd(o, vcast_vd_d(1), vd2gety_vd_vd2(r))); + + o = visinf_vo_vd(d); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); + + return r; +} + +TYPE2_FUNCATR VECTOR_CC vdouble2 XSINCOSPI_U35(vdouble d) { + vopmask o; + vdouble u, s, t, rx, ry; + vdouble2 r; + + u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); + vint q = vtruncate_vi_vd(u); + q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); + s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); + + t = s; + s = vmul_vd_vd_vd(s, s); + + // + + u = vcast_vd_d(+0.6880638894766060136e-11); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.1757159564542310199e-8)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3133616327257867311e-6)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.3657620416388486452e-4)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2490394570189932103e-2)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.8074551218828056320e-1)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.7853981633974482790e+0)); + + rx = vmul_vd_vd_vd(u, t); + + // + + u = vcast_vd_d(-0.3860141213683794352e-12); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1150057888029681415e-9)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.2461136493006663553e-7)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3590860446623516713e-5)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.3259918869269435942e-3)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1585434424381541169e-1)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.3084251375340424373e+0)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1)); + + ry = u; + + // + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0))); + r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(4)), vcast_vi_i(4))); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(4)), vcast_vi_i(4))); + r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); + + o = vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); + + o = visinf_vo_vd(d); + r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); + r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); + + return r; +} + +TYPE6_FUNCATR VECTOR_CC vdouble2 XMODF(vdouble x) { + vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); + fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); + fr = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52)), vcast_vd_d(0), fr); + + vdouble2 ret; + + ret = vd2setxy_vd2_vd_vd(vcopysign_vd_vd_vd(fr, x), vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); + + return ret; +} + +#ifdef ENABLE_GNUABI +EXPORT VECTOR_CC void xsincos(vdouble a, double *ps, double *pc) { + vdouble2 r = sincosk(a); + vstoreu_v_p_vd(ps, vd2getx_vd_vd2(r)); + vstoreu_v_p_vd(pc, vd2gety_vd_vd2(r)); +} + +EXPORT VECTOR_CC void xsincos_u1(vdouble a, double *ps, double *pc) { + vdouble2 r = sincosk_u1(a); + vstoreu_v_p_vd(ps, vd2getx_vd_vd2(r)); + vstoreu_v_p_vd(pc, vd2gety_vd_vd2(r)); +} + +EXPORT VECTOR_CC void xsincospi_u05(vdouble a, double *ps, double *pc) { + vdouble2 r = sincospik_u05(a); + vstoreu_v_p_vd(ps, vd2getx_vd_vd2(r)); + vstoreu_v_p_vd(pc, vd2gety_vd_vd2(r)); +} + +EXPORT VECTOR_CC void xsincospi_u35(vdouble a, double *ps, double *pc) { + vdouble2 r = sincospik_u35(a); + vstoreu_v_p_vd(ps, vd2getx_vd_vd2(r)); + vstoreu_v_p_vd(pc, vd2gety_vd_vd2(r)); +} + +EXPORT CONST VECTOR_CC vdouble xmodf(vdouble a, double *iptr) { + vdouble2 r = modfk(a); + vstoreu_v_p_vd(iptr, vd2gety_vd_vd2(r)); + return vd2getx_vd_vd2(r); +} +#endif // #ifdef ENABLE_GNUABI +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vdouble2 sinpik(vdouble d) { + vopmask o; + vdouble u, s, t; + vdouble2 x, s2; + + u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); + vint q = vtruncate_vi_vd(u); + q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(2))); + + s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); + t = s; + s = vmul_vd_vd_vd(s, s); + s2 = ddmul_vd2_vd_vd(t, t); + + // + + u = vsel_vd_vo_d_d(o, 9.94480387626843774090208e-16, -2.02461120785182399295868e-14); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -3.89796226062932799164047e-13, 6.948218305801794613277840e-12)); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 1.150115825399960352669010e-10, -1.75724749952853179952664e-09)); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -2.46113695010446974953590e-08, 3.133616889668683928784220e-07)); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 3.590860448590527540050620e-06, -3.65762041821615519203610e-05)); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -0.000325991886927389905997954, 0.0024903945701927185027435600)); + x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), + vsel_vd2_vo_d_d_d_d(o, 0.0158543442438155018914259, -1.04693272280631521908845e-18, + -0.0807455121882807852484731, 3.61852475067037104849987e-18)); + x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), + vsel_vd2_vo_d_d_d_d(o, -0.308425137534042437259529, -1.95698492133633550338345e-17, + 0.785398163397448278999491, 3.06287113727155002607105e-17)); + + x = ddmul_vd2_vd2_vd2(x, vsel_vd2_vo_vd2_vd2(o, s2, vcast_vd2_vd_vd(t, vcast_vd_d(0)))); + x = vsel_vd2_vo_vd2_vd2(o, ddadd2_vd2_vd2_vd(x, vcast_vd_d(1)), x); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(4)), vcast_vi_i(4))); + x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); + x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); + + return x; +} + +EXPORT CONST VECTOR_CC vdouble xsinpi_u05(vdouble d) { + vdouble2 x = sinpik(d); + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + r = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), r); + r = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)), vreinterpret_vm_vd(r))); + r = vreinterpret_vd_vm(vor_vm_vo64_vm(visinf_vo_vd(d), vreinterpret_vm_vd(r))); + + return r; +} + +static INLINE CONST VECTOR_CC vdouble2 cospik(vdouble d) { + vopmask o; + vdouble u, s, t; + vdouble2 x, s2; + + u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); + vint q = vtruncate_vi_vd(u); + q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0))); + + s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); + t = s; + s = vmul_vd_vd_vd(s, s); + s2 = ddmul_vd2_vd_vd(t, t); + + // + + u = vsel_vd_vo_d_d(o, 9.94480387626843774090208e-16, -2.02461120785182399295868e-14); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -3.89796226062932799164047e-13, 6.948218305801794613277840e-12)); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 1.150115825399960352669010e-10, -1.75724749952853179952664e-09)); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -2.46113695010446974953590e-08, 3.133616889668683928784220e-07)); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 3.590860448590527540050620e-06, -3.65762041821615519203610e-05)); + u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -0.000325991886927389905997954, 0.0024903945701927185027435600)); + x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), + vsel_vd2_vo_d_d_d_d(o, 0.0158543442438155018914259, -1.04693272280631521908845e-18, + -0.0807455121882807852484731, 3.61852475067037104849987e-18)); + x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), + vsel_vd2_vo_d_d_d_d(o, -0.308425137534042437259529, -1.95698492133633550338345e-17, + 0.785398163397448278999491, 3.06287113727155002607105e-17)); + + x = ddmul_vd2_vd2_vd2(x, vsel_vd2_vo_vd2_vd2(o, s2, vcast_vd2_vd_vd(t, vcast_vd_d(0)))); + x = vsel_vd2_vo_vd2_vd2(o, ddadd2_vd2_vd2_vd(x, vcast_vd_d(1)), x); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(4)), vcast_vi_i(4))); + x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); + x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); + + return x; +} + +EXPORT CONST VECTOR_CC vdouble xcospi_u05(vdouble d) { + vdouble2 x = cospik(d); + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + r = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)), vcast_vd_d(1), r); + r = vreinterpret_vd_vm(vor_vm_vo64_vm(visinf_vo_vd(d), vreinterpret_vm_vd(r))); + + return r; +} + +EXPORT CONST VECTOR_CC vdouble xtan(vdouble d) { +#if !defined(DETERMINISTIC) + vdouble u, s, x, y; + vopmask o; + vint ql; + + if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { + vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); + ql = vrint_vi_vd(dql); + x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); + x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), x); + } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1e+6))))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); + ql = vrint_vi_vd(dql); + + x = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), x); + x = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), x); + x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), x); + x = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), x); + x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), x); + x = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), x); + } else { + ddi_t ddi = rempi(d); + ql = ddigeti_vi_ddi(ddi); + x = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); + x = vreinterpret_vd_vm(vor_vm_vo64_vm(visinf_vo_vd(d), vreinterpret_vm_vd(x))); + x = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(x))); + } + + x = vmul_vd_vd_vd(x, vcast_vd_d(0.5)); + s = vmul_vd_vd_vd(x, x); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY8(s, s2, s4, + +0.3245098826639276316e-3, + +0.5619219738114323735e-3, + +0.1460781502402784494e-2, + +0.3591611540792499519e-2, + +0.8863268409563113126e-2, + +0.2186948728185535498e-1, + +0.5396825399517272970e-1, + +0.1333333333330500581e+0); + + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3333333333333343695e+0)); + u = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(u, x), x); + + y = vmla_vd_vd_vd_vd(u, u, vcast_vd_d(-1)); + x = vmul_vd_vd_vd(u, vcast_vd_d(-2)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); + u = vdiv_vd_vd_vd(vsel_vd_vo_vd_vd(o, vneg_vd_vd(y), x), + vsel_vd_vo_vd_vd(o, x, y)); + u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vdouble u, s, x, y; + vopmask o; + vint ql; + + vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); + ql = vrint_vi_vd(dql); + s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); + s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), s); + vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), u); + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), u); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), u); + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), u); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), u); + u = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), u); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); + s = vsel_vd_vo_vd_vd(g, s, u); + g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1e+6)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + ddi_t ddi = rempi(d); + vint ql2 = ddigeti_vi_ddi(ddi); + u = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); + u = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(u))); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); + s = vsel_vd_vo_vd_vd(g, s, u); + } + } + + x = vmul_vd_vd_vd(s, vcast_vd_d(0.5)); + s = vmul_vd_vd_vd(x, x); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY8(s, s2, s4, + +0.3245098826639276316e-3, + +0.5619219738114323735e-3, + +0.1460781502402784494e-2, + +0.3591611540792499519e-2, + +0.8863268409563113126e-2, + +0.2186948728185535498e-1, + +0.5396825399517272970e-1, + +0.1333333333330500581e+0); + + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3333333333333343695e+0)); + u = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(u, x), x); + + y = vmla_vd_vd_vd_vd(u, u, vcast_vd_d(-1)); + x = vmul_vd_vd_vd(u, vcast_vd_d(-2)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); + u = vdiv_vd_vd_vd(vsel_vd_vo_vd_vd(o, vneg_vd_vd(y), x), + vsel_vd_vo_vd_vd(o, x, y)); + u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +EXPORT CONST VECTOR_CC vdouble xtan_u1(vdouble d) { +#if !defined(DETERMINISTIC) + vdouble u; + vdouble2 s, t, x, y; + vopmask o; + vint ql; + + if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { + vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); + ql = vrint_vi_vd(dql); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); + s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); + } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + s = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(vcast_vd2_d_d(M_2_PI_H, M_2_PI_L), d), + vsub_vd_vd_vd(vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), + vcast_vd_d(-0.5), vcast_vd_d(0.5)), dqh)); + const vdouble dql = vtruncate_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s))); + ql = vrint_vi_vd(dql); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + s = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5 ))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5 ))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5 ))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); + } else { + ddi_t ddi = rempi(d); + ql = ddigeti_vi_ddi(ddi); + s = ddigetdd_vd2_ddi(ddi); + o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); + s = vd2setx_vd2_vd2_vd(s, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(s))))); + s = vd2sety_vd2_vd2_vd(s, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(s))))); + } + + t = ddscale_vd2_vd2_vd(s, vcast_vd_d(0.5)); + s = ddsqu_vd2_vd2(t); + + vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY8(vd2getx_vd_vd2(s), s2, s4, + +0.3245098826639276316e-3, + +0.5619219738114323735e-3, + +0.1460781502402784494e-2, + +0.3591611540792499519e-2, + +0.8863268409563113126e-2, + +0.2186948728185535498e-1, + +0.5396825399517272970e-1, + +0.1333333333330500581e+0); + + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(+0.3333333333333343695e+0)); + x = ddadd_vd2_vd2_vd2(t, ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(s, t), u)); + + y = ddadd_vd2_vd_vd2(vcast_vd_d(-1), ddsqu_vd2_vd2(x)); + x = ddscale_vd2_vd2_vd(x, vcast_vd_d(-2)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); + + x = dddiv_vd2_vd2_vd2(vsel_vd2_vo_vd2_vd2(o, ddneg_vd2_vd2(y), x), + vsel_vd2_vo_vd2_vd2(o, x, y)); + + u = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vdouble u; + vdouble2 s, t, x, y; + vopmask o; + vint ql; + + const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); + ql = vrint_vi_vd(dql); + u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); + s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); + vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); + dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); + x = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(vcast_vd2_d_d(M_2_PI_H, M_2_PI_L), d), + vsub_vd_vd_vd(vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), + vcast_vd_d(-0.5), vcast_vd_d(0.5)), dqh)); + const vdouble dql = vtruncate_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x))); + + u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); + x = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5 ))); + x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); + x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5 ))); + x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); + x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5 ))); + x = ddadd_vd2_vd2_vd(x, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); + s = vsel_vd2_vo_vd2_vd2(g, s, x); + g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); + + if (!LIKELY(vtestallones_i_vo64(g))) { + ddi_t ddi = rempi(d); + x = ddigetdd_vd2_ddi(ddi); + o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); + x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); + x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); + + ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ddigeti_vi_ddi(ddi)); + s = vsel_vd2_vo_vd2_vd2(g, s, x); + } + } + + t = ddscale_vd2_vd2_vd(s, vcast_vd_d(0.5)); + s = ddsqu_vd2_vd2(t); + + vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); + u = POLY8(vd2getx_vd_vd2(s), s2, s4, + +0.3245098826639276316e-3, + +0.5619219738114323735e-3, + +0.1460781502402784494e-2, + +0.3591611540792499519e-2, + +0.8863268409563113126e-2, + +0.2186948728185535498e-1, + +0.5396825399517272970e-1, + +0.1333333333330500581e+0); + + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(+0.3333333333333343695e+0)); + x = ddadd_vd2_vd2_vd2(t, ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(s, t), u)); + + y = ddadd_vd2_vd_vd2(vcast_vd_d(-1), ddsqu_vd2_vd2(x)); + x = ddscale_vd2_vd2_vd(x, vcast_vd_d(-2)); + + o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); + + x = dddiv_vd2_vd2_vd2(vsel_vd2_vo_vd2_vd2(o, ddneg_vd2_vd2(y), x), + vsel_vd2_vo_vd2_vd2(o, x, y)); + + u = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); + + u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +static INLINE CONST VECTOR_CC vdouble atan2k(vdouble y, vdouble x) { + vdouble s, t, u; + vint q; + vopmask p; + + q = vsel_vi_vd_vi(x, vcast_vi_i(-2)); + x = vabs_vd_vd(x); + + q = vsel_vi_vd_vd_vi_vi(x, y, vadd_vi_vi_vi(q, vcast_vi_i(1)), q); + p = vlt_vo_vd_vd(x, y); + s = vsel_vd_vo_vd_vd(p, vneg_vd_vd(x), y); + t = vmax_vd_vd_vd(x, y); + + s = vdiv_vd_vd_vd(s, t); + t = vmul_vd_vd_vd(s, s); + + vdouble t2 = vmul_vd_vd_vd(t, t), t4 = vmul_vd_vd_vd(t2, t2), t8 = vmul_vd_vd_vd(t4, t4), t16 = vmul_vd_vd_vd(t8, t8); + u = POLY19(t, t2, t4, t8, t16, + -1.88796008463073496563746e-05, + 0.000209850076645816976906797, + -0.00110611831486672482563471, + 0.00370026744188713119232403, + -0.00889896195887655491740809, + 0.016599329773529201970117, + -0.0254517624932312641616861, + 0.0337852580001353069993897, + -0.0407629191276836500001934, + 0.0466667150077840625632675, + -0.0523674852303482457616113, + 0.0587666392926673580854313, + -0.0666573579361080525984562, + 0.0769219538311769618355029, + -0.090908995008245008229153, + 0.111111105648261418443745, + -0.14285714266771329383765, + 0.199999999996591265594148, + -0.333333333333311110369124); + + t = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(t, u), s); + t = vmla_vd_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(M_PI/2), t); + + return t; +} + +static INLINE CONST VECTOR_CC vdouble2 atan2k_u1(vdouble2 y, vdouble2 x) { + vdouble u; + vdouble2 s, t; + vint q; + vopmask p; + + q = vsel_vi_vd_vi(vd2getx_vd_vd2(x), vcast_vi_i(-2)); + p = vlt_vo_vd_vd(vd2getx_vd_vd2(x), vcast_vd_d(0)); + vmask b = vand_vm_vo64_vm(p, vreinterpret_vm_vd(vcast_vd_d(-0.0))); + x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(b, vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); + x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(b, vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); + + q = vsel_vi_vd_vd_vi_vi(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y), vadd_vi_vi_vi(q, vcast_vi_i(1)), q); + p = vlt_vo_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); + s = vsel_vd2_vo_vd2_vd2(p, ddneg_vd2_vd2(x), y); + t = vsel_vd2_vo_vd2_vd2(p, y, x); + + s = dddiv_vd2_vd2_vd2(s, t); + t = ddsqu_vd2_vd2(s); + t = ddnormalize_vd2_vd2(t); + + vdouble t2 = vmul_vd_vd_vd(vd2getx_vd_vd2(t), vd2getx_vd_vd2(t)), t4 = vmul_vd_vd_vd(t2, t2), t8 = vmul_vd_vd_vd(t4, t4), t16 = vmul_vd_vd_vd(t8, t8); + u = POLY16(vd2getx_vd_vd2(t), t2, t4, t8, + 1.06298484191448746607415e-05, + -0.000125620649967286867384336, + 0.00070557664296393412389774, + -0.00251865614498713360352999, + 0.00646262899036991172313504, + -0.0128281333663399031014274, + 0.0208024799924145797902497, + -0.0289002344784740315686289, + 0.0359785005035104590853656, + -0.041848579703592507506027, + 0.0470843011653283988193763, + -0.0524914210588448421068719, + 0.0587946590969581003860434, + -0.0666620884778795497194182, + 0.0769225330296203768654095, + -0.0909090442773387574781907); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(t), vcast_vd_d(0.111111108376896236538123)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(t), vcast_vd_d(-0.142857142756268568062339)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(t), vcast_vd_d(0.199999999997977351284817)); + u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(t), vcast_vd_d(-0.333333333333317605173818)); + + t = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(s, t), u)); + + t = ddadd_vd2_vd2_vd2(ddmul_vd2_vd2_vd(vcast_vd2_d_d(1.570796326794896557998982, 6.12323399573676603586882e-17), vcast_vd_vi(q)), t); + + return t; +} + +static INLINE CONST VECTOR_CC vdouble visinf2_vd_vd_vd(vdouble d, vdouble m) { + return vreinterpret_vd_vm(vand_vm_vo64_vm(visinf_vo_vd(d), vor_vm_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(m)))); +} + +EXPORT CONST VECTOR_CC vdouble xatan2(vdouble y, vdouble x) { + vdouble r = atan2k(vabs_vd_vd(y), x); + + r = vmulsign_vd_vd_vd(r, x); + r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, vcast_vd_d(0))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/2), x))), r); + r = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/4), x))), r); + r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(y, vcast_vd_d(0.0)), vreinterpret_vd_vm(vand_vm_vo64_vm(vsignbit_vo_vd(x), vreinterpret_vm_vd(vcast_vd_d(M_PI)))), r); + + r = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(vmulsign_vd_vd_vd(r, y)))); + return r; +} + +EXPORT CONST VECTOR_CC vdouble xatan2_u1(vdouble y, vdouble x) { + vopmask o = vlt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(5.5626846462680083984e-309)); // nexttoward((1.0 / DBL_MAX), 1) + x = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(x, vcast_vd_d(UINT64_C(1) << 53)), x); + y = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(y, vcast_vd_d(UINT64_C(1) << 53)), y); + + vdouble2 d = atan2k_u1(vcast_vd2_vd_vd(vabs_vd_vd(y), vcast_vd_d(0)), vcast_vd2_vd_vd(x, vcast_vd_d(0))); + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); + + r = vmulsign_vd_vd_vd(r, x); + r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, vcast_vd_d(0))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/2), x))), r); + r = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/4), x))), r); + r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(y, vcast_vd_d(0.0)), vreinterpret_vd_vm(vand_vm_vo64_vm(vsignbit_vo_vd(x), vreinterpret_vm_vd(vcast_vd_d(M_PI)))), r); + + r = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(vmulsign_vd_vd_vd(r, y)))); + return r; +} + +EXPORT CONST VECTOR_CC vdouble xasin(vdouble d) { + vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); + vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))); + vdouble x = vsel_vd_vo_vd_vd(o, vabs_vd_vd(d), vsqrt_vd_vd(x2)), u; + + vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); + u = POLY12(x2, x4, x8, x16, + +0.3161587650653934628e-1, + -0.1581918243329996643e-1, + +0.1929045477267910674e-1, + +0.6606077476277170610e-2, + +0.1215360525577377331e-1, + +0.1388715184501609218e-1, + +0.1735956991223614604e-1, + +0.2237176181932048341e-1, + +0.3038195928038132237e-1, + +0.4464285681377102438e-1, + +0.7500000000378581611e-1, + +0.1666666666666497543e+0); + + u = vmla_vd_vd_vd_vd(u, vmul_vd_vd_vd(x, x2), x); + + vdouble r = vsel_vd_vo_vd_vd(o, u, vmla_vd_vd_vd_vd(u, vcast_vd_d(-2), vcast_vd_d(M_PI/2))); + return vmulsign_vd_vd_vd(r, d); +} + +EXPORT CONST VECTOR_CC vdouble xasin_u1(vdouble d) { + vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); + vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))), u; + vdouble2 x = vsel_vd2_vo_vd2_vd2(o, vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), ddsqrt_vd2_vd(x2)); + x = vsel_vd2_vo_vd2_vd2(veq_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1.0)), vcast_vd2_d_d(0, 0), x); + + vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); + u = POLY12(x2, x4, x8, x16, + +0.3161587650653934628e-1, + -0.1581918243329996643e-1, + +0.1929045477267910674e-1, + +0.6606077476277170610e-2, + +0.1215360525577377331e-1, + +0.1388715184501609218e-1, + +0.1735956991223614604e-1, + +0.2237176181932048341e-1, + +0.3038195928038132237e-1, + +0.4464285681377102438e-1, + +0.7500000000378581611e-1, + +0.1666666666666497543e+0); + + u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x))); + + vdouble2 y = ddsub_vd2_vd2_vd(ddsub_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116/4, 1.2246467991473532072e-16/4), x), u); + + vdouble r = vsel_vd_vo_vd_vd(o, vadd_vd_vd_vd(u, vd2getx_vd_vd2(x)), + vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(y), vd2gety_vd_vd2(y)), vcast_vd_d(2))); + return vmulsign_vd_vd_vd(r, d); +} + +EXPORT CONST VECTOR_CC vdouble xacos(vdouble d) { + vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); + vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), + vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))), u; + vdouble x = vsel_vd_vo_vd_vd(o, vabs_vd_vd(d), vsqrt_vd_vd(x2)); + x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1.0)), vcast_vd_d(0), x); + + vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); + u = POLY12(x2, x4, x8, x16, + +0.3161587650653934628e-1, + -0.1581918243329996643e-1, + +0.1929045477267910674e-1, + +0.6606077476277170610e-2, + +0.1215360525577377331e-1, + +0.1388715184501609218e-1, + +0.1735956991223614604e-1, + +0.2237176181932048341e-1, + +0.3038195928038132237e-1, + +0.4464285681377102438e-1, + +0.7500000000378581611e-1, + +0.1666666666666497543e+0); + + u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(x2, x)); + + vdouble y = vsub_vd_vd_vd(vcast_vd_d(M_PI/2), vadd_vd_vd_vd(vmulsign_vd_vd_vd(x, d), vmulsign_vd_vd_vd(u, d))); + x = vadd_vd_vd_vd(x, u); + vdouble r = vsel_vd_vo_vd_vd(o, y, vmul_vd_vd_vd(x, vcast_vd_d(2))); + return vsel_vd_vo_vd_vd(vandnot_vo_vo_vo(o, vlt_vo_vd_vd(d, vcast_vd_d(0))), + vd2getx_vd_vd2(ddadd_vd2_vd2_vd(vcast_vd2_d_d(3.141592653589793116, 1.2246467991473532072e-16), + vneg_vd_vd(r))), r); +} + +EXPORT CONST VECTOR_CC vdouble xacos_u1(vdouble d) { + vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); + vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))), u; + vdouble2 x = vsel_vd2_vo_vd2_vd2(o, vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), ddsqrt_vd2_vd(x2)); + x = vsel_vd2_vo_vd2_vd2(veq_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1.0)), vcast_vd2_d_d(0, 0), x); + + vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); + u = POLY12(x2, x4, x8, x16, + +0.3161587650653934628e-1, + -0.1581918243329996643e-1, + +0.1929045477267910674e-1, + +0.6606077476277170610e-2, + +0.1215360525577377331e-1, + +0.1388715184501609218e-1, + +0.1735956991223614604e-1, + +0.2237176181932048341e-1, + +0.3038195928038132237e-1, + +0.4464285681377102438e-1, + +0.7500000000378581611e-1, + +0.1666666666666497543e+0); + + u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x))); + + vdouble2 y = ddsub_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116/2, 1.2246467991473532072e-16/2), + ddadd_vd2_vd_vd(vmulsign_vd_vd_vd(vd2getx_vd_vd2(x), d), vmulsign_vd_vd_vd(u, d))); + x = ddadd_vd2_vd2_vd(x, u); + + y = vsel_vd2_vo_vd2_vd2(o, y, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); + + y = vsel_vd2_vo_vd2_vd2(vandnot_vo_vo_vo(o, vlt_vo_vd_vd(d, vcast_vd_d(0))), + ddsub_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116, 1.2246467991473532072e-16), y), y); + + return vadd_vd_vd_vd(vd2getx_vd_vd2(y), vd2gety_vd_vd2(y)); +} + +EXPORT CONST VECTOR_CC vdouble xatan_u1(vdouble d) { + vdouble2 d2 = atan2k_u1(vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), vcast_vd2_d_d(1, 0)); + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(d2), vd2gety_vd_vd2(d2)); + r = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(1.570796326794896557998982), r); + return vmulsign_vd_vd_vd(r, d); +} + +EXPORT CONST VECTOR_CC vdouble xatan(vdouble s) { + vdouble t, u; + vint q; +#if defined(__INTEL_COMPILER) && defined(ENABLE_PURECFMA_SCALAR) + vdouble w = s; +#endif + + q = vsel_vi_vd_vi(s, vcast_vi_i(2)); + s = vabs_vd_vd(s); + + q = vsel_vi_vd_vd_vi_vi(vcast_vd_d(1), s, vadd_vi_vi_vi(q, vcast_vi_i(1)), q); + s = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vcast_vd_d(1), s), vrec_vd_vd(s), s); + + t = vmul_vd_vd_vd(s, s); + + vdouble t2 = vmul_vd_vd_vd(t, t), t4 = vmul_vd_vd_vd(t2, t2), t8 = vmul_vd_vd_vd(t4, t4), t16 = vmul_vd_vd_vd(t8, t8); + u = POLY19(t, t2, t4, t8, t16, + -1.88796008463073496563746e-05, + 0.000209850076645816976906797, + -0.00110611831486672482563471, + 0.00370026744188713119232403, + -0.00889896195887655491740809, + 0.016599329773529201970117, + -0.0254517624932312641616861, + 0.0337852580001353069993897, + -0.0407629191276836500001934, + 0.0466667150077840625632675, + -0.0523674852303482457616113, + 0.0587666392926673580854313, + -0.0666573579361080525984562, + 0.0769219538311769618355029, + -0.090908995008245008229153, + 0.111111105648261418443745, + -0.14285714266771329383765, + 0.199999999996591265594148, + -0.333333333333311110369124); + + t = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(t, u), s); + + t = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(1))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), t), t); + t = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(2))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(t))); + +#if defined(__INTEL_COMPILER) && defined(ENABLE_PURECFMA_SCALAR) + t = vsel_vd_vo_vd_vd(veq_vo_vd_vd(w, vcast_vd_d(0)), w, t); +#endif + + return t; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vdouble xlog(vdouble d) { + vdouble x, x2; + vdouble t, m; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); + vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); + e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); +#else + vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); + m = vgetmant_vd_vd(d); +#endif + + x = vdiv_vd_vd_vd(vsub_vd_vd_vd(m, vcast_vd_d(1)), vadd_vd_vd_vd(vcast_vd_d(1), m)); + x2 = vmul_vd_vd_vd(x, x); + + vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x3 = vmul_vd_vd_vd(x, x2); + t = POLY7(x2, x4, x8, + 0.153487338491425068243146, + 0.152519917006351951593857, + 0.181863266251982985677316, + 0.222221366518767365905163, + 0.285714294746548025383248, + 0.399999999950799600689777, + 0.6666666666667778740063); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + x = vmla_vd_vd_vd_vd(x, vcast_vd_d(2), vmul_vd_vd_vd(vcast_vd_d(0.693147180559945286226764), vcast_vd_vi(e))); + x = vmla_vd_vd_vd_vd(x3, t, x); + + x = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), x); + x = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), x); + x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), x); +#else + x = vmla_vd_vd_vd_vd(x, vcast_vd_d(2), vmul_vd_vd_vd(vcast_vd_d(0.693147180559945286226764), e)); + x = vmla_vd_vd_vd_vd(x3, t, x); + + x = vfixup_vd_vd_vd_vi2_i(x, d, vcast_vi2_i((5 << (5*4))), 0); +#endif + + return x; +} +#endif // #if !defined(DETERMINISTIC) + +EXPORT CONST VECTOR_CC vdouble xexp(vdouble d) { + vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(R_LN2))), s; + vint q = vrint_vi_vd(u); + + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2U), d); + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2L), s); + +#ifdef ENABLE_FMA_DP + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); + u = POLY10(s, s2, s4, s8, + +0.2081276378237164457e-8, + +0.2511210703042288022e-7, + +0.2755762628169491192e-6, + +0.2755723402025388239e-5, + +0.2480158687479686264e-4, + +0.1984126989855865850e-3, + +0.1388888888914497797e-2, + +0.8333333333314938210e-2, + +0.4166666666666602598e-1, + +0.1666666666666669072e+0); + u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5000000000000000000e+0)); + u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1000000000000000000e+1)); + u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1000000000000000000e+1)); +#else // #ifdef ENABLE_FMA_DP + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); + u = POLY10(s, s2, s4, s8, + 2.08860621107283687536341e-09, + 2.51112930892876518610661e-08, + 2.75573911234900471893338e-07, + 2.75572362911928827629423e-06, + 2.4801587159235472998791e-05, + 0.000198412698960509205564975, + 0.00138888888889774492207962, + 0.00833333333331652721664984, + 0.0416666666666665047591422, + 0.166666666666666851703837); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5000000000000000000e+0)); + + u = vadd_vd_vd_vd(vcast_vd_d(1), vmla_vd_vd_vd_vd(vmul_vd_vd_vd(s, s), u, s)); +#endif // #ifdef ENABLE_FMA_DP + + u = vldexp2_vd_vd_vi(u, q); + + u = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(709.78271114955742909217217426)), vcast_vd_d(SLEEF_INFINITY), u); + u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-1000)), vreinterpret_vm_vd(u))); + + return u; +} + +static INLINE CONST VECTOR_CC vdouble expm1k(vdouble d) { + vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(R_LN2))), s; + vint q = vrint_vi_vd(u); + + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2U), d); + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2L), s); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); + u = POLY10(s, s2, s4, s8, + 2.08860621107283687536341e-09, + 2.51112930892876518610661e-08, + 2.75573911234900471893338e-07, + 2.75572362911928827629423e-06, + 2.4801587159235472998791e-05, + 0.000198412698960509205564975, + 0.00138888888889774492207962, + 0.00833333333331652721664984, + 0.0416666666666665047591422, + 0.166666666666666851703837); + + u = vadd_vd_vd_vd(vmla_vd_vd_vd_vd(s2, vcast_vd_d(0.5), vmul_vd_vd_vd(vmul_vd_vd_vd(s2, s), u)), s); + + u = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(q, vcast_vi_i(0))), u, + vsub_vd_vd_vd(vldexp2_vd_vd_vi(vadd_vd_vd_vd(u, vcast_vd_d(1)), q), vcast_vd_d(1))); + + return u; +} + +static INLINE CONST VECTOR_CC vdouble2 logk(vdouble d) { + vdouble2 x, x2, s; + vdouble t, m; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); + vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); + e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); +#else + vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); + m = vgetmant_vd_vd(d); +#endif + + x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); + x2 = ddsqu_vd2_vd2(x); + + vdouble x4 = vmul_vd_vd_vd(vd2getx_vd_vd2(x2), vd2getx_vd_vd2(x2)), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); + t = POLY9(vd2getx_vd_vd2(x2), x4, x8, x16, + 0.116255524079935043668677, + 0.103239680901072952701192, + 0.117754809412463995466069, + 0.13332981086846273921509, + 0.153846227114512262845736, + 0.181818180850050775676507, + 0.222222222230083560345903, + 0.285714285714249172087875, + 0.400000000000000077715612); + + vdouble2 c = vcast_vd2_d_d(0.666666666666666629659233, 3.80554962542412056336616e-17); +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); +#else + s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), e); +#endif + s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); + x = ddmul_vd2_vd2_vd2(x2, x); + s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd2(x, c)); + x = ddmul_vd2_vd2_vd2(x2, x); + s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(x, t)); + + return s; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vdouble xlog_u1(vdouble d) { + vdouble2 x; + vdouble t, m, x2; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); + vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); + e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); +#else + vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); + m = vgetmant_vd_vd(d); +#endif + + x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); + x2 = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); + + vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4); + t = POLY7(x2, x4, x8, + 0.1532076988502701353e+0, + 0.1525629051003428716e+0, + 0.1818605932937785996e+0, + 0.2222214519839380009e+0, + 0.2857142932794299317e+0, + 0.3999999999635251990e+0, + 0.6666666666667333541e+0); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); +#else + vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), e); +#endif + + s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t)); + + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); + r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); + r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); +#else + r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); +#endif + + return r; +} +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vdouble expk(vdouble2 d) { + vdouble u = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(R_LN2)); + vdouble dq = vrint_vd_vd(u); + vint q = vrint_vi_vd(dq); + vdouble2 s, t; + + s = ddadd2_vd2_vd2_vd(d, vmul_vd_vd_vd(dq, vcast_vd_d(-L2U))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dq, vcast_vd_d(-L2L))); + + s = ddnormalize_vd2_vd2(s); + + vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); + u = POLY10(vd2getx_vd_vd2(s), s2, s4, s8, + 2.51069683420950419527139e-08, + 2.76286166770270649116855e-07, + 2.75572496725023574143864e-06, + 2.48014973989819794114153e-05, + 0.000198412698809069797676111, + 0.0013888888939977128960529, + 0.00833333333332371417601081, + 0.0416666666665409524128449, + 0.166666666666666740681535, + 0.500000000000000999200722); + + t = ddadd_vd2_vd_vd2(vcast_vd_d(1), s); + t = ddadd_vd2_vd2_vd2(t, ddmul_vd2_vd2_vd(ddsqu_vd2_vd2(s), u)); + + u = vadd_vd_vd_vd(vd2getx_vd_vd2(t), vd2gety_vd_vd2(t)); + u = vldexp2_vd_vd_vi(u, q); + + u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(-1000)), vreinterpret_vm_vd(u))); + + return u; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vdouble xpow(vdouble x, vdouble y) { +#if 1 + vopmask yisint = visint_vo_vd(y); + vopmask yisodd = vand_vo_vo_vo(visodd_vo_vd(y), yisint); + + vdouble2 d = ddmul_vd2_vd2_vd(logk(vabs_vd_vd(x)), y); + vdouble result = expk(d); + result = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(709.78271114955742909217217426)), vcast_vd_d(SLEEF_INFINITY), result); + + result = vmul_vd_vd_vd(result, + vsel_vd_vo_vd_vd(vgt_vo_vd_vd(x, vcast_vd_d(0)), + vcast_vd_d(1), + vsel_vd_vo_vd_vd(yisint, vsel_vd_vo_vd_vd(yisodd, vcast_vd_d(-1.0), vcast_vd_d(1)), vcast_vd_d(SLEEF_NAN)))); + + vdouble efx = vmulsign_vd_vd_vd(vsub_vd_vd_vd(vabs_vd_vd(x), vcast_vd_d(1)), y); + + result = vsel_vd_vo_vd_vd(visinf_vo_vd(y), + vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(efx, vcast_vd_d(0.0)), + vreinterpret_vm_vd(vsel_vd_vo_vd_vd(veq_vo_vd_vd(efx, vcast_vd_d(0.0)), + vcast_vd_d(1.0), + vcast_vd_d(SLEEF_INFINITY))))), + result); + + result = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, vcast_vd_d(0.0))), + vmul_vd_vd_vd(vsel_vd_vo_vd_vd(yisodd, vsign_vd_vd(x), vcast_vd_d(1.0)), + vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0.0)), vneg_vd_vd(y), y), vcast_vd_d(0.0)), + vreinterpret_vm_vd(vcast_vd_d(SLEEF_INFINITY))))), + result); + + result = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(result))); + + result = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(y, vcast_vd_d(0)), veq_vo_vd_vd(x, vcast_vd_d(1))), vcast_vd_d(1), result); + + return result; +#else + return expk(ddmul_vd2_vd2_vd(logk(x), y)); +#endif +} +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vdouble2 expk2(vdouble2 d) { + vdouble u = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(R_LN2)); + vdouble dq = vrint_vd_vd(u); + vint q = vrint_vi_vd(dq); + vdouble2 s, t; + + s = ddadd2_vd2_vd2_vd(d, vmul_vd_vd_vd(dq, vcast_vd_d(-L2U))); + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dq, vcast_vd_d(-L2L))); + + vdouble2 s2 = ddsqu_vd2_vd2(s), s4 = ddsqu_vd2_vd2(s2); + vdouble s8 = vmul_vd_vd_vd(vd2getx_vd_vd2(s4), vd2getx_vd_vd2(s4)); + u = POLY10(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s2), vd2getx_vd_vd2(s4), s8, + +0.1602472219709932072e-9, + +0.2092255183563157007e-8, + +0.2505230023782644465e-7, + +0.2755724800902135303e-6, + +0.2755731892386044373e-5, + +0.2480158735605815065e-4, + +0.1984126984148071858e-3, + +0.1388888888886763255e-2, + +0.8333333333333347095e-2, + +0.4166666666666669905e-1); + + t = ddadd_vd2_vd_vd2(vcast_vd_d(0.5), ddmul_vd2_vd2_vd(s, vcast_vd_d(+0.1666666666666666574e+0))); + t = ddadd_vd2_vd_vd2(vcast_vd_d(1.0), ddmul_vd2_vd2_vd2(t, s)); + t = ddadd_vd2_vd_vd2(vcast_vd_d(1.0), ddmul_vd2_vd2_vd2(t, s)); + t = ddadd_vd2_vd2_vd2(t, ddmul_vd2_vd2_vd(s4, u)); + + t = vd2setx_vd2_vd2_vd(t, vldexp2_vd_vd_vi(vd2getx_vd_vd2(t), q)); + t = vd2sety_vd2_vd2_vd(t, vldexp2_vd_vd_vi(vd2gety_vd_vd2(t), q)); + + t = vd2setx_vd2_vd2_vd(t, vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(-1000)), vreinterpret_vm_vd(vd2getx_vd_vd2(t))))); + t = vd2sety_vd2_vd2_vd(t, vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(-1000)), vreinterpret_vm_vd(vd2gety_vd_vd2(t))))); + + return t; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vdouble xsinh(vdouble x) { + vdouble y = vabs_vd_vd(x); + vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); + d = ddsub_vd2_vd2_vd2(d, ddrec_vd2_vd2(d)); + y = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(0.5)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); + y = vmulsign_vd_vd_vd(y, x); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vdouble xcosh(vdouble x) { + vdouble y = vabs_vd_vd(x); + vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); + d = ddadd_vd2_vd2_vd2(d, ddrec_vd2_vd2(d)); + y = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(0.5)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vdouble xtanh(vdouble x) { + vdouble y = vabs_vd_vd(x); + vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); + vdouble2 e = ddrec_vd2_vd2(d); + d = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd2_vd2(d, ddneg_vd2_vd2(e)), ddadd2_vd2_vd2_vd2(d, e)); + y = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(18.714973875)), visnan_vo_vd(y)), vcast_vd_d(1.0), y); + y = vmulsign_vd_vd_vd(y, x); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vdouble xsinh_u35(vdouble x) { + vdouble e = expm1k(vabs_vd_vd(x)); + + vdouble y = vdiv_vd_vd_vd(vadd_vd_vd_vd(e, vcast_vd_d(2)), vadd_vd_vd_vd(e, vcast_vd_d(1))); + y = vmul_vd_vd_vd(y, vmul_vd_vd_vd(vcast_vd_d(0.5), e)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(709)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); + y = vmulsign_vd_vd_vd(y, x); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vdouble xcosh_u35(vdouble x) { + vdouble e = xexp(vabs_vd_vd(x)); + vdouble y = vmla_vd_vd_vd_vd(vcast_vd_d(0.5), e, vdiv_vd_vd_vd(vcast_vd_d(0.5), e)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(709)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vdouble xtanh_u35(vdouble x) { + vdouble d = expm1k(vmul_vd_vd_vd(vcast_vd_d(2), vabs_vd_vd(x))); + vdouble y = vdiv_vd_vd_vd(d, vadd_vd_vd_vd(vcast_vd_d(2), d)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(18.714973875)), visnan_vo_vd(y)), vcast_vd_d(1.0), y); + y = vmulsign_vd_vd_vd(y, x); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +static INLINE CONST VECTOR_CC vdouble2 logk2(vdouble2 d) { + vdouble2 x, x2, m, s; + vdouble t; + vint e; + + e = vilogbk_vi_vd(vmul_vd_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(1.0/0.75))); + + m = vd2setxy_vd2_vd_vd(vldexp2_vd_vd_vi(vd2getx_vd_vd2(d), vneg_vi_vi(e)), + vldexp2_vd_vd_vi(vd2gety_vd_vd2(d), vneg_vi_vi(e))); + + x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(m, vcast_vd_d(-1)), ddadd2_vd2_vd2_vd(m, vcast_vd_d(1))); + x2 = ddsqu_vd2_vd2(x); + + vdouble x4 = vmul_vd_vd_vd(vd2getx_vd_vd2(x2), vd2getx_vd_vd2(x2)), x8 = vmul_vd_vd_vd(x4, x4); + t = POLY7(vd2getx_vd_vd2(x2), x4, x8, + 0.13860436390467167910856, + 0.131699838841615374240845, + 0.153914168346271945653214, + 0.181816523941564611721589, + 0.22222224632662035403996, + 0.285714285511134091777308, + 0.400000000000914013309483); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(x2), vcast_vd_d(0.666666666666664853302393)); + + s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); + s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); + s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(x2, x), t)); + + return s; +} + +EXPORT CONST VECTOR_CC vdouble xasinh(vdouble x) { + vdouble y = vabs_vd_vd(x); + vopmask o = vgt_vo_vd_vd(y, vcast_vd_d(1)); + vdouble2 d; + + d = vsel_vd2_vo_vd2_vd2(o, ddrec_vd2_vd(x), vcast_vd2_vd_vd(y, vcast_vd_d(0))); + d = ddsqrt_vd2_vd2(ddadd2_vd2_vd2_vd(ddsqu_vd2_vd2(d), vcast_vd_d(1))); + d = vsel_vd2_vo_vd2_vd2(o, ddmul_vd2_vd2_vd(d, y), d); + + d = logk2(ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd(d, x))); + y = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(SQRT_DBL_MAX)), + visnan_vo_vd(y)), + vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), x), y); + + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + y = vsel_vd_vo_vd_vd(visnegzero_vo_vd(x), vcast_vd_d(-0.0), y); + + return y; +} + +EXPORT CONST VECTOR_CC vdouble xacosh(vdouble x) { + vdouble2 d = logk2(ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddsqrt_vd2_vd2(ddadd2_vd2_vd_vd(x, vcast_vd_d(1))), ddsqrt_vd2_vd2(ddadd2_vd2_vd_vd(x, vcast_vd_d(-1)))), x)); + vdouble y = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); + + y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(SQRT_DBL_MAX)), + visnan_vo_vd(y)), + vcast_vd_d(SLEEF_INFINITY), y); + y = vreinterpret_vd_vm(vandnot_vm_vo64_vm(veq_vo_vd_vd(x, vcast_vd_d(1.0)), vreinterpret_vm_vd(y))); + + y = vreinterpret_vd_vm(vor_vm_vo64_vm(vlt_vo_vd_vd(x, vcast_vd_d(1.0)), vreinterpret_vm_vd(y))); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vdouble xatanh(vdouble x) { + vdouble y = vabs_vd_vd(x); + vdouble2 d = logk2(dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(1), y), ddadd2_vd2_vd_vd(vcast_vd_d(1), vneg_vd_vd(y)))); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(vgt_vo_vd_vd(y, vcast_vd_d(1.0)), vreinterpret_vm_vd(vsel_vd_vo_vd_vd(veq_vo_vd_vd(y, vcast_vd_d(1.0)), vcast_vd_d(SLEEF_INFINITY), vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(0.5)))))); + + y = vmulsign_vd_vd_vd(y, x); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(y))); + y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vdouble xcbrt(vdouble d) { + vdouble x, y, q = vcast_vd_d(1.0); + vint e, qu, re; + vdouble t; + +#if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) + vdouble s = d; +#endif + e = vadd_vi_vi_vi(vilogbk_vi_vd(vabs_vd_vd(d)), vcast_vi_i(1)); + d = vldexp2_vd_vd_vi(d, vneg_vi_vi(e)); + + t = vadd_vd_vd_vd(vcast_vd_vi(e), vcast_vd_d(6144)); + qu = vtruncate_vi_vd(vmul_vd_vd_vd(t, vcast_vd_d(1.0/3.0))); + re = vtruncate_vi_vd(vsub_vd_vd_vd(t, vmul_vd_vd_vd(vcast_vd_vi(qu), vcast_vd_d(3)))); + + q = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(1))), vcast_vd_d(1.2599210498948731647672106), q); + q = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(2))), vcast_vd_d(1.5874010519681994747517056), q); + q = vldexp2_vd_vd_vi(q, vsub_vi_vi_vi(qu, vcast_vi_i(2048))); + + q = vmulsign_vd_vd_vd(q, d); + + d = vabs_vd_vd(d); + + x = vcast_vd_d(-0.640245898480692909870982); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.96155103020039511818595)); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-5.73353060922947843636166)); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(6.03990368989458747961407)); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-3.85841935510444988821632)); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.2307275302496609725722)); + + y = vmul_vd_vd_vd(x, x); y = vmul_vd_vd_vd(y, y); x = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vmlapn_vd_vd_vd_vd(d, y, x), vcast_vd_d(1.0 / 3.0))); + y = vmul_vd_vd_vd(vmul_vd_vd_vd(d, x), x); + y = vmul_vd_vd_vd(vsub_vd_vd_vd(y, vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(2.0 / 3.0), y), vmla_vd_vd_vd_vd(y, x, vcast_vd_d(-1.0)))), q); + +#if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) + y = vsel_vd_vo_vd_vd(visinf_vo_vd(s), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), s), y); + y = vsel_vd_vo_vd_vd(veq_vo_vd_vd(s, vcast_vd_d(0)), vmulsign_vd_vd_vd(vcast_vd_d(0), s), y); +#endif + + return y; +} + +EXPORT CONST VECTOR_CC vdouble xcbrt_u1(vdouble d) { + vdouble x, y, z, t; + vdouble2 q2 = vcast_vd2_d_d(1, 0), u, v; + vint e, qu, re; + +#if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) + vdouble s = d; +#endif + e = vadd_vi_vi_vi(vilogbk_vi_vd(vabs_vd_vd(d)), vcast_vi_i(1)); + d = vldexp2_vd_vd_vi(d, vneg_vi_vi(e)); + + t = vadd_vd_vd_vd(vcast_vd_vi(e), vcast_vd_d(6144)); + qu = vtruncate_vi_vd(vmul_vd_vd_vd(t, vcast_vd_d(1.0/3.0))); + re = vtruncate_vi_vd(vsub_vd_vd_vd(t, vmul_vd_vd_vd(vcast_vd_vi(qu), vcast_vd_d(3)))); + + q2 = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(1))), vcast_vd2_d_d(1.2599210498948731907, -2.5899333753005069177e-17), q2); + q2 = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(2))), vcast_vd2_d_d(1.5874010519681995834, -1.0869008194197822986e-16), q2); + + q2 = vd2setxy_vd2_vd_vd(vmulsign_vd_vd_vd(vd2getx_vd_vd2(q2), d), vmulsign_vd_vd_vd(vd2gety_vd_vd2(q2), d)); + d = vabs_vd_vd(d); + + x = vcast_vd_d(-0.640245898480692909870982); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.96155103020039511818595)); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-5.73353060922947843636166)); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(6.03990368989458747961407)); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-3.85841935510444988821632)); + x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.2307275302496609725722)); + + y = vmul_vd_vd_vd(x, x); y = vmul_vd_vd_vd(y, y); x = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vmlapn_vd_vd_vd_vd(d, y, x), vcast_vd_d(1.0 / 3.0))); + + z = x; + + u = ddmul_vd2_vd_vd(x, x); + u = ddmul_vd2_vd2_vd2(u, u); + u = ddmul_vd2_vd2_vd(u, d); + u = ddadd2_vd2_vd2_vd(u, vneg_vd_vd(x)); + y = vadd_vd_vd_vd(vd2getx_vd_vd2(u), vd2gety_vd_vd2(u)); + + y = vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(-2.0 / 3.0), y), z); + v = ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd(z, z), y); + v = ddmul_vd2_vd2_vd(v, d); + v = ddmul_vd2_vd2_vd2(v, q2); + z = vldexp2_vd_vd_vi(vadd_vd_vd_vd(vd2getx_vd_vd2(v), vd2gety_vd_vd2(v)), vsub_vi_vi_vi(qu, vcast_vi_i(2048))); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + z = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), vd2getx_vd_vd2(q2)), z); + z = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vreinterpret_vd_vm(vsignbit_vm_vd(vd2getx_vd_vd2(q2))), z); +#else + z = vsel_vd_vo_vd_vd(visinf_vo_vd(s), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), s), z); + z = vsel_vd_vo_vd_vd(veq_vo_vd_vd(s, vcast_vd_d(0)), vmulsign_vd_vd_vd(vcast_vd_d(0), s), z); +#endif + + return z; +} +#endif // #if !defined(DETERMINISTIC) + +EXPORT CONST VECTOR_CC vdouble xexp2(vdouble d) { + vdouble u = vrint_vd_vd(d), s; + vint q = vrint_vi_vd(u); + + s = vsub_vd_vd_vd(d, u); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); + u = POLY10(s, s2, s4, s8, + +0.4434359082926529454e-9, + +0.7073164598085707425e-8, + +0.1017819260921760451e-6, + +0.1321543872511327615e-5, + +0.1525273353517584730e-4, + +0.1540353045101147808e-3, + +0.1333355814670499073e-2, + +0.9618129107597600536e-2, + +0.5550410866482046596e-1, + +0.2402265069591012214e+0); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.6931471805599452862e+0)); + +#ifdef ENABLE_FMA_DP + u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(1)); +#else + u = vd2getx_vd_vd2(ddnormalize_vd2_vd2(ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(u, s)))); +#endif + + u = vldexp2_vd_vd_vi(u, q); + + u = vsel_vd_vo_vd_vd(vge_vo_vd_vd(d, vcast_vd_d(1024)), vcast_vd_d(SLEEF_INFINITY), u); + u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-2000)), vreinterpret_vm_vd(u))); + + return u; +} + +EXPORT CONST VECTOR_CC vdouble xexp2_u35(vdouble d) { + vdouble u = vrint_vd_vd(d), s; + vint q = vrint_vi_vd(u); + + s = vsub_vd_vd_vd(d, u); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); + u = POLY10(s, s2, s4, s8, + +0.4434359082926529454e-9, + +0.7073164598085707425e-8, + +0.1017819260921760451e-6, + +0.1321543872511327615e-5, + +0.1525273353517584730e-4, + +0.1540353045101147808e-3, + +0.1333355814670499073e-2, + +0.9618129107597600536e-2, + +0.5550410866482046596e-1, + +0.2402265069591012214e+0); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.6931471805599452862e+0)); + + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1)); + + u = vldexp2_vd_vd_vi(u, q); + + u = vsel_vd_vo_vd_vd(vge_vo_vd_vd(d, vcast_vd_d(1024)), vcast_vd_d(SLEEF_INFINITY), u); + u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-2000)), vreinterpret_vm_vd(u))); + + return u; +} + +EXPORT CONST VECTOR_CC vdouble xexp10(vdouble d) { + vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(LOG10_2))), s; + vint q = vrint_vi_vd(u); + + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10U), d); + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10L), s); + + u = vcast_vd_d(+0.2411463498334267652e-3); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1157488415217187375e-2)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5013975546789733659e-2)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1959762320720533080e-1)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.6808936399446784138e-1)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2069958494722676234e+0)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5393829292058536229e+0)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1171255148908541655e+1)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2034678592293432953e+1)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2650949055239205876e+1)); + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2302585092994045901e+1)); + +#ifdef ENABLE_FMA_DP + u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(1)); +#else + u = vd2getx_vd_vd2(ddnormalize_vd2_vd2(ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(u, s)))); +#endif + + u = vldexp2_vd_vd_vi(u, q); + + u = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(308.25471555991671)), vcast_vd_d(SLEEF_INFINITY), u); + u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-350)), vreinterpret_vm_vd(u))); + + return u; +} + +EXPORT CONST VECTOR_CC vdouble xexp10_u35(vdouble d) { + vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(LOG10_2))), s; + vint q = vrint_vi_vd(u); + + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10U), d); + s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10L), s); + + vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); + u = POLY11(s, s2, s4, s8, + +0.2411463498334267652e-3, + +0.1157488415217187375e-2, + +0.5013975546789733659e-2, + +0.1959762320720533080e-1, + +0.6808936399446784138e-1, + +0.2069958494722676234e+0, + +0.5393829292058536229e+0, + +0.1171255148908541655e+1, + +0.2034678592293432953e+1, + +0.2650949055239205876e+1, + +0.2302585092994045901e+1); + + u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1)); + + u = vldexp2_vd_vd_vi(u, q); + + u = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(308.25471555991671)), vcast_vd_d(SLEEF_INFINITY), u); + u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-350)), vreinterpret_vm_vd(u))); + + return u; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vdouble xexpm1(vdouble a) { + vdouble2 d = ddadd2_vd2_vd2_vd(expk2(vcast_vd2_vd_vd(a, vcast_vd_d(0))), vcast_vd_d(-1.0)); + vdouble x = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); + x = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(a, vcast_vd_d(709.782712893383996732223)), vcast_vd_d(SLEEF_INFINITY), x); + x = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(a, vcast_vd_d(-36.736800569677101399113302437)), vcast_vd_d(-1), x); + x = vsel_vd_vo_vd_vd(visnegzero_vo_vd(a), vcast_vd_d(-0.0), x); + return x; +} + +EXPORT CONST VECTOR_CC vdouble xlog10(vdouble d) { + vdouble2 x; + vdouble t, m, x2; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); + vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); + e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); +#else + vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); + m = vgetmant_vd_vd(d); +#endif + + x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); + x2 = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); + + vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4); + t = POLY7(x2, x4, x8, + +0.6653725819576758460e-1, + +0.6625722782820833712e-1, + +0.7898105214313944078e-1, + +0.9650955035715275132e-1, + +0.1240841409721444993e+0, + +0.1737177927454605086e+0, + +0.2895296546021972617e+0); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.30102999566398119802, -2.803728127785170339e-18), vcast_vd_vi(e)); +#else + vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.30102999566398119802, -2.803728127785170339e-18), e); +#endif + + s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(0.86858896380650363334, 1.1430059694096389311e-17))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t)); + + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); + r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); + r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); +#else + r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); +#endif + + return r; +} + +EXPORT CONST VECTOR_CC vdouble xlog2(vdouble d) { + vdouble2 x; + vdouble t, m, x2; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); + vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); + e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); +#else + vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); + m = vgetmant_vd_vd(d); +#endif + + x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); + x2 = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); + + vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4); + t = POLY7(x2, x4, x8, + +0.2211941750456081490e+0, + +0.2200768693152277689e+0, + +0.2623708057488514656e+0, + +0.3205977477944495502e+0, + +0.4121985945485324709e+0, + +0.5770780162997058982e+0, + +0.96179669392608091449); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vdouble2 s = ddadd2_vd2_vd_vd2(vcast_vd_vi(e), + ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(2.885390081777926774, 6.0561604995516736434e-18))); +#else + vdouble2 s = ddadd2_vd2_vd_vd2(e, + ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(2.885390081777926774, 6.0561604995516736434e-18))); +#endif + + s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t)); + + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); + r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); + r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); +#else + r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); +#endif + + return r; +} + +EXPORT CONST VECTOR_CC vdouble xlog2_u35(vdouble d) { + vdouble m, t, x, x2; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); + vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); + e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); +#else + vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); + e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); + m = vgetmant_vd_vd(d); +#endif + + x = vdiv_vd_vd_vd(vsub_vd_vd_vd(m, vcast_vd_d(1)), vadd_vd_vd_vd(m, vcast_vd_d(1))); + x2 = vmul_vd_vd_vd(x, x); + + t = vcast_vd_d(+0.2211941750456081490e+0); + t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.2200768693152277689e+0)); + t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.2623708057488514656e+0)); + t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.3205977477944495502e+0)); + t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.4121985945485324709e+0)); + t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.5770780162997058982e+0)); + t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.96179669392608091449 )); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vdouble2 s = ddadd_vd2_vd_vd2(vcast_vd_vi(e), + ddmul_vd2_vd_vd(x, vcast_vd_d(2.885390081777926774))); +#else + vdouble2 s = ddadd_vd2_vd_vd2(e, + ddmul_vd2_vd_vd(x, vcast_vd_d(2.885390081777926774))); +#endif + + vdouble r = vmla_vd_vd_vd_vd(t, vmul_vd_vd_vd(x, x2), vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s))); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); + r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); + r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); +#else + r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); +#endif + + return r; +} + +EXPORT CONST VECTOR_CC vdouble xlog1p(vdouble d) { + vdouble2 x; + vdouble t, m, x2; + + vdouble dp1 = vadd_vd_vd_vd(d, vcast_vd_d(1)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vd_vd(dp1, vcast_vd_d(DBL_MIN)); + dp1 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(dp1, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), dp1); + vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(dp1, vcast_vd_d(1.0/0.75))); + t = vldexp3_vd_vd_vi(vcast_vd_d(1), vneg_vi_vi(e)); + m = vmla_vd_vd_vd_vd(d, t, vsub_vd_vd_vd(t, vcast_vd_d(1))); + e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); + vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); +#else + vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(dp1, vcast_vd_d(1.0/0.75))); + e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); + t = vldexp3_vd_vd_vi(vcast_vd_d(1), vneg_vi_vi(vrint_vi_vd(e))); + m = vmla_vd_vd_vd_vd(d, t, vsub_vd_vd_vd(t, vcast_vd_d(1))); + vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), e); +#endif + + x = dddiv_vd2_vd2_vd2(vcast_vd2_vd_vd(m, vcast_vd_d(0)), ddadd_vd2_vd_vd(vcast_vd_d(2), m)); + x2 = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); + + vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4); + t = POLY7(x2, x4, x8, + 0.1532076988502701353e+0, + 0.1525629051003428716e+0, + 0.1818605932937785996e+0, + 0.2222214519839380009e+0, + 0.2857142932794299317e+0, + 0.3999999999635251990e+0, + 0.6666666666667333541e+0); + + s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); + s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t)); + + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s)); + + r = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(1e+307)), vcast_vd_d(SLEEF_INFINITY), r); + r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(-1)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); + r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(-1)), vcast_vd_d(-SLEEF_INFINITY), r); + r = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), r); + + return r; +} + +// + +static INLINE CONST VECTOR_CC vint2 vcast_vi2_i_i(int i0, int i1) { return vcast_vi2_vm(vcast_vm_i_i(i0, i1)); } + +EXPORT CONST VECTOR_CC vdouble xfabs(vdouble x) { return vabs_vd_vd(x); } + +EXPORT CONST VECTOR_CC vdouble xcopysign(vdouble x, vdouble y) { return vcopysign_vd_vd_vd(x, y); } + +EXPORT CONST VECTOR_CC vdouble xfmax(vdouble x, vdouble y) { +#if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) + return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmax_vd_vd_vd(x, y)); +#else + return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(x, y), x, y)); +#endif +} + +EXPORT CONST VECTOR_CC vdouble xfmin(vdouble x, vdouble y) { +#if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) + return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmin_vd_vd_vd(x, y)); +#else + return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(y, x), x, y)); +#endif +} + +EXPORT CONST VECTOR_CC vdouble xfdim(vdouble x, vdouble y) { + vdouble ret = vsub_vd_vd_vd(x, y); + ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(ret, vcast_vd_d(0)), veq_vo_vd_vd(x, y)), vcast_vd_d(0), ret); + return ret; +} + +EXPORT CONST VECTOR_CC vdouble xtrunc(vdouble x) { +#ifdef FULL_FP_ROUNDING + return vtruncate_vd_vd(x); +#else + vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); + fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); + return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52))), x, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); +#endif +} + +EXPORT CONST VECTOR_CC vdouble xfloor(vdouble x) { + vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); + fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); + fr = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(fr, vcast_vd_d(0)), vadd_vd_vd_vd(fr, vcast_vd_d(1.0)), fr); + return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52))), x, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); +} + +EXPORT CONST VECTOR_CC vdouble xceil(vdouble x) { + vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); + fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); + fr = vsel_vd_vo_vd_vd(vle_vo_vd_vd(fr, vcast_vd_d(0)), fr, vsub_vd_vd_vd(fr, vcast_vd_d(1.0))); + return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52))), x, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); +} + +EXPORT CONST VECTOR_CC vdouble xround(vdouble d) { + vdouble x = vadd_vd_vd_vd(d, vcast_vd_d(0.5)); + vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); + fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); + x = vsel_vd_vo_vd_vd(vand_vo_vo_vo(vle_vo_vd_vd(x, vcast_vd_d(0)), veq_vo_vd_vd(fr, vcast_vd_d(0))), vsub_vd_vd_vd(x, vcast_vd_d(1.0)), x); + fr = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(fr, vcast_vd_d(0)), vadd_vd_vd_vd(fr, vcast_vd_d(1.0)), fr); + x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0.49999999999999994449)), vcast_vd_d(0), x); + return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(d), vge_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 52))), d, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), d)); +} + +EXPORT CONST VECTOR_CC vdouble xrint(vdouble d) { +#ifdef FULL_FP_ROUNDING + return vrint_vd_vd(d); +#else + vdouble c = vmulsign_vd_vd_vd(vcast_vd_d(INT64_C(1) << 52), d); + return vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 52)), + d, vorsign_vd_vd_vd(vsub_vd_vd_vd(vadd_vd_vd_vd(d, c), c), d)); +#endif +} + +EXPORT CONST VECTOR_CC vdouble xnextafter(vdouble x, vdouble y) { + x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vmulsign_vd_vd_vd(vcast_vd_d(0), y), x); + vint2 t, xi2 = vreinterpret_vi2_vd(x); + vopmask c = vxor_vo_vo_vo(vsignbit_vo_vd(x), vge_vo_vd_vd(y, x)); + + t = vadd_vi2_vi2_vi2(vxor_vi2_vi2_vi2(xi2, vcast_vi2_i_i(0x7fffffff, 0xffffffff)), vcast_vi2_i_i(0, 1)); + t = vadd_vi2_vi2_vi2(t, vrev21_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i_i(0, 1), veq_vi2_vi2_vi2(t, vcast_vi2_i_i(-1, 0))))); + xi2 = vreinterpret_vi2_vd(vsel_vd_vo_vd_vd(c, vreinterpret_vd_vi2(t), vreinterpret_vd_vi2(xi2))); + + xi2 = vsub_vi2_vi2_vi2(xi2, vcast_vi2_vm(vand_vm_vo64_vm(vneq_vo_vd_vd(x, y), vcast_vm_i_i(0, 1)))); + + xi2 = vreinterpret_vi2_vd(vsel_vd_vo_vd_vd(vneq_vo_vd_vd(x, y), + vreinterpret_vd_vi2(vadd_vi2_vi2_vi2(xi2, vrev21_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i_i(0, -1), veq_vi2_vi2_vi2(xi2, vcast_vi2_i_i(0, -1)))))), + vreinterpret_vd_vi2(xi2))); + + t = vadd_vi2_vi2_vi2(vxor_vi2_vi2_vi2(xi2, vcast_vi2_i_i(0x7fffffff, 0xffffffff)), vcast_vi2_i_i(0, 1)); + t = vadd_vi2_vi2_vi2(t, vrev21_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i_i(0, 1), veq_vi2_vi2_vi2(t, vcast_vi2_i_i(-1, 0))))); + xi2 = vreinterpret_vi2_vd(vsel_vd_vo_vd_vd(c, vreinterpret_vd_vi2(t), vreinterpret_vd_vi2(xi2))); + + vdouble ret = vreinterpret_vd_vi2(xi2); + + ret = vsel_vd_vo_vd_vd(vand_vo_vo_vo(veq_vo_vd_vd(ret, vcast_vd_d(0)), vneq_vo_vd_vd(x, vcast_vd_d(0))), + vmulsign_vd_vd_vd(vcast_vd_d(0), x), ret); + + ret = vsel_vd_vo_vd_vd(vand_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(0)), veq_vo_vd_vd(y, vcast_vd_d(0))), y, ret); + + ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vcast_vd_d(SLEEF_NAN), ret); + + return ret; +} + +EXPORT CONST VECTOR_CC vdouble xfrfrexp(vdouble x) { + x = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(DBL_MIN)), vmul_vd_vd_vd(x, vcast_vd_d(UINT64_C(1) << 63)), x); + + vmask xm = vreinterpret_vm_vd(x); + xm = vand_vm_vm_vm(xm, vcast_vm_i_i(~0x7ff00000, ~0)); + xm = vor_vm_vm_vm (xm, vcast_vm_i_i( 0x3fe00000, 0)); + + vdouble ret = vreinterpret_vd_vm(xm); + + ret = vsel_vd_vo_vd_vd(visinf_vo_vd(x), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), x), ret); + ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), x, ret); + + return ret; +} + +EXPORT CONST VECTOR_CC vint xexpfrexp(vdouble x) { + x = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(DBL_MIN)), vmul_vd_vd_vd(x, vcast_vd_d(UINT64_C(1) << 63)), x); + + vint ret = vcastu_vi_vi2(vreinterpret_vi2_vd(x)); + ret = vsub_vi_vi_vi(vand_vi_vi_vi(vsrl_vi_vi_i(ret, 20), vcast_vi_i(0x7ff)), vcast_vi_i(0x3fe)); + + ret = vsel_vi_vo_vi_vi(vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(0)), visnan_vo_vd(x)), visinf_vo_vd(x)), vcast_vi_i(0), ret); + + return ret; +} + +EXPORT CONST VECTOR_CC vdouble xfma(vdouble x, vdouble y, vdouble z) { +#ifdef ENABLE_FMA_DP + return vfma_vd_vd_vd_vd(x, y, z); +#else + vdouble h2 = vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z), q = vcast_vd_d(1); + vopmask o = vlt_vo_vd_vd(vabs_vd_vd(h2), vcast_vd_d(1e-300)); + { + const double c0 = UINT64_C(1) << 54, c1 = c0 * c0, c2 = c1 * c1; + x = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(x, vcast_vd_d(c1)), x); + y = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(y, vcast_vd_d(c1)), y); + z = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(z, vcast_vd_d(c2)), z); + q = vsel_vd_vo_vd_vd(o, vcast_vd_d(1.0 / c2), q); + } + o = vgt_vo_vd_vd(vabs_vd_vd(h2), vcast_vd_d(1e+300)); + { + const double c0 = UINT64_C(1) << 54, c1 = c0 * c0, c2 = c1 * c1; + x = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(x, vcast_vd_d(1.0 / c1)), x); + y = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(y, vcast_vd_d(1.0 / c1)), y); + z = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(z, vcast_vd_d(1.0 / c2)), z); + q = vsel_vd_vo_vd_vd(o, vcast_vd_d(c2), q); + } + vdouble2 d = ddmul_vd2_vd_vd(x, y); + d = ddadd2_vd2_vd2_vd(d, z); + vdouble ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(0)), veq_vo_vd_vd(y, vcast_vd_d(0))), z, vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d))); + o = visinf_vo_vd(z); + o = vandnot_vo_vo_vo(visinf_vo_vd(x), o); + o = vandnot_vo_vo_vo(visnan_vo_vd(x), o); + o = vandnot_vo_vo_vo(visinf_vo_vd(y), o); + o = vandnot_vo_vo_vo(visnan_vo_vd(y), o); + h2 = vsel_vd_vo_vd_vd(o, z, h2); + + o = vor_vo_vo_vo(visinf_vo_vd(h2), visnan_vo_vd(h2)); + + return vsel_vd_vo_vd_vd(o, h2, vmul_vd_vd_vd(ret, q)); +#endif +} + +SQRTU05_FUNCATR VECTOR_CC vdouble xsqrt_u05(vdouble d) { +#if defined(ENABLE_FMA_DP) + vdouble q, w, x, y, z; + + d = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), d); + + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(8.636168555094445E-78)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(1.157920892373162E77)), d); + q = vsel_vd_vo_vd_vd(o, vcast_vd_d(2.9387358770557188E-39), vcast_vd_d(1)); + + y = vreinterpret_vd_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i_i(0x5fe6ec85, 0xe7de30da), vsrl_vi2_vi2_i(vreinterpret_vi2_vd(d), 1))); + + x = vmul_vd_vd_vd(d, y); w = vmul_vd_vd_vd(vcast_vd_d(0.5), y); + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); + x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); + x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); + x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); + + y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(1.5)); w = vadd_vd_vd_vd(w, w); + w = vmul_vd_vd_vd(w, y); + x = vmul_vd_vd_vd(w, d); + y = vfmapn_vd_vd_vd_vd(w, d, x); z = vfmanp_vd_vd_vd_vd(w, x, vcast_vd_d(1)); + + z = vfmanp_vd_vd_vd_vd(w, y, z); w = vmul_vd_vd_vd(vcast_vd_d(0.5), x); + w = vfma_vd_vd_vd_vd(w, z, y); + w = vadd_vd_vd_vd(w, x); + + w = vmul_vd_vd_vd(w, q); + + w = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(d, vcast_vd_d(0)), + veq_vo_vd_vd(d, vcast_vd_d(SLEEF_INFINITY))), d, w); + + w = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), w); + + return w; +#else + vdouble q; + vopmask o; + + d = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), d); + + o = vlt_vo_vd_vd(d, vcast_vd_d(8.636168555094445E-78)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(1.157920892373162E77)), d); + q = vsel_vd_vo_vd_vd(o, vcast_vd_d(2.9387358770557188E-39*0.5), vcast_vd_d(0.5)); + + o = vgt_vo_vd_vd(d, vcast_vd_d(1.3407807929942597e+154)); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(7.4583407312002070e-155)), d); + q = vsel_vd_vo_vd_vd(o, vcast_vd_d(1.1579208923731620e+77*0.5), q); + + vdouble x = vreinterpret_vd_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i_i(0x5fe6ec86, 0), vsrl_vi2_vi2_i(vreinterpret_vi2_vd(vadd_vd_vd_vd(d, vcast_vd_d(1e-320))), 1))); + + x = vmul_vd_vd_vd(x, vsub_vd_vd_vd(vcast_vd_d(1.5), vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(0.5), d), x), x))); + x = vmul_vd_vd_vd(x, vsub_vd_vd_vd(vcast_vd_d(1.5), vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(0.5), d), x), x))); + x = vmul_vd_vd_vd(x, vsub_vd_vd_vd(vcast_vd_d(1.5), vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(0.5), d), x), x))); + x = vmul_vd_vd_vd(x, d); + + vdouble2 d2 = ddmul_vd2_vd2_vd2(ddadd2_vd2_vd_vd2(d, ddmul_vd2_vd_vd(x, x)), ddrec_vd2_vd(x)); + + x = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d2), vd2gety_vd_vd2(d2)), q); + + x = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), x); + x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, x); + + return x; +#endif +} + +EXPORT CONST VECTOR_CC vdouble xsqrt(vdouble d) { +#if defined(ACCURATE_SQRT) + return vsqrt_vd_vd(d); +#else + // fall back to approximation if ACCURATE_SQRT is undefined + return xsqrt_u05(d); +#endif +} + +EXPORT CONST VECTOR_CC vdouble xsqrt_u35(vdouble d) { return xsqrt_u05(d); } + +EXPORT CONST VECTOR_CC vdouble xhypot_u05(vdouble x, vdouble y) { + x = vabs_vd_vd(x); + y = vabs_vd_vd(y); + vdouble min = vmin_vd_vd_vd(x, y), n = min; + vdouble max = vmax_vd_vd_vd(x, y), d = max; + + vopmask o = vlt_vo_vd_vd(max, vcast_vd_d(DBL_MIN)); + n = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(n, vcast_vd_d(UINT64_C(1) << 54)), n); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(UINT64_C(1) << 54)), d); + + vdouble2 t = dddiv_vd2_vd2_vd2(vcast_vd2_vd_vd(n, vcast_vd_d(0)), vcast_vd2_vd_vd(d, vcast_vd_d(0))); + t = ddmul_vd2_vd2_vd(ddsqrt_vd2_vd2(ddadd2_vd2_vd2_vd(ddsqu_vd2_vd2(t), vcast_vd_d(1))), max); + vdouble ret = vadd_vd_vd_vd(vd2getx_vd_vd2(t), vd2gety_vd_vd2(t)); + ret = vsel_vd_vo_vd_vd(visnan_vo_vd(ret), vcast_vd_d(SLEEF_INFINITY), ret); + ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(min, vcast_vd_d(0)), max, ret); + ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vcast_vd_d(SLEEF_NAN), ret); + ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(SLEEF_INFINITY)), veq_vo_vd_vd(y, vcast_vd_d(SLEEF_INFINITY))), vcast_vd_d(SLEEF_INFINITY), ret); + + return ret; +} + +EXPORT CONST VECTOR_CC vdouble xhypot_u35(vdouble x, vdouble y) { + x = vabs_vd_vd(x); + y = vabs_vd_vd(y); + vdouble min = vmin_vd_vd_vd(x, y); + vdouble max = vmax_vd_vd_vd(x, y); + + vdouble t = vdiv_vd_vd_vd(min, max); + vdouble ret = vmul_vd_vd_vd(max, vsqrt_vd_vd(vmla_vd_vd_vd_vd(t, t, vcast_vd_d(1)))); + ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(min, vcast_vd_d(0)), max, ret); + ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vcast_vd_d(SLEEF_NAN), ret); + ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(SLEEF_INFINITY)), veq_vo_vd_vd(y, vcast_vd_d(SLEEF_INFINITY))), vcast_vd_d(SLEEF_INFINITY), ret); + + return ret; +} + +static INLINE CONST VECTOR_CC vdouble vtoward0(vdouble x) { // returns nextafter(x, 0) + vdouble t = vreinterpret_vd_vm(vadd64_vm_vm_vm(vreinterpret_vm_vd(x), vcast_vm_i_i(-1, -1))); + return vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(0), t); +} + +static INLINE CONST VECTOR_CC vdouble vptrunc(vdouble x) { // round to integer toward 0, positive argument only +#ifdef FULL_FP_ROUNDING + return vtruncate_vd_vd(x); +#else + vdouble fr = vmla_vd_vd_vd_vd(vcast_vd_d(-(double)(INT64_C(1) << 31)), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))), x); + fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); + return vsel_vd_vo_vd_vd(vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52)), x, vsub_vd_vd_vd(x, fr)); +#endif +} + +/* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ +EXPORT CONST VECTOR_CC vdouble xfmod(vdouble x, vdouble y) { + vdouble n = vabs_vd_vd(x), d = vabs_vd_vd(y), s = vcast_vd_d(1), q; + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); + n = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(n, vcast_vd_d(UINT64_C(1) << 54)), n); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(UINT64_C(1) << 54)), d); + s = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(s , vcast_vd_d(1.0 / (UINT64_C(1) << 54))), s); + vdouble2 r = vcast_vd2_vd_vd(n, vcast_vd_d(0)); + vdouble rd = vtoward0(vrec_vd_vd(d)); + + for(int i=0;i<21;i++) { // ceil(log2(DBL_MAX) / 52) + q = vptrunc(vmul_vd_vd_vd(vtoward0(vd2getx_vd_vd2(r)), rd)); +#ifndef ENABLE_FMA_DP + q = vreinterpret_vd_vm(vand_vm_vm_vm(vreinterpret_vm_vd(q), vcast_vm_i_i(0xffffffff, 0xfffffffe))); +#endif + q = vsel_vd_vo_vd_vd(vand_vo_vo_vo(vgt_vo_vd_vd(vmul_vd_vd_vd(vcast_vd_d(3), d), vd2getx_vd_vd2(r)), + vge_vo_vd_vd(vd2getx_vd_vd2(r), d)), + vcast_vd_d(2), q); + q = vsel_vd_vo_vd_vd(vand_vo_vo_vo(vgt_vo_vd_vd(vadd_vd_vd_vd(d, d), vd2getx_vd_vd2(r)), + vge_vo_vd_vd(vd2getx_vd_vd2(r), d)), + vcast_vd_d(1), q); + r = ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd2(r, ddmul_vd2_vd_vd(q, vneg_vd_vd(d)))); + if (vtestallones_i_vo64(vlt_vo_vd_vd(vd2getx_vd_vd2(r), d))) break; + } + + vdouble ret = vmul_vd_vd_vd(vd2getx_vd_vd2(r), s); + ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(r), vd2gety_vd_vd2(r)), d), vcast_vd_d(0), ret); + + ret = vmulsign_vd_vd_vd(ret, x); + + ret = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(n, d), x, ret); + ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), ret); + + return ret; +} + +static INLINE VECTOR_CC vdouble vrintk2_vd_vd(vdouble d) { +#ifdef FULL_FP_ROUNDING + return vrint_vd_vd(d); +#else + vdouble c = vmulsign_vd_vd_vd(vcast_vd_d(INT64_C(1) << 52), d); + return vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 52)), + d, vorsign_vd_vd_vd(vsub_vd_vd_vd(vadd_vd_vd_vd(d, c), c), d)); +#endif +} + +EXPORT CONST VECTOR_CC vdouble xremainder(vdouble x, vdouble y) { + vdouble n = vabs_vd_vd(x), d = vabs_vd_vd(y), s = vcast_vd_d(1), q; + vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN*2)); + n = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(n, vcast_vd_d(UINT64_C(1) << 54)), n); + d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(UINT64_C(1) << 54)), d); + s = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(s , vcast_vd_d(1.0 / (UINT64_C(1) << 54))), s); + vdouble rd = vrec_vd_vd(d); + vdouble2 r = vcast_vd2_vd_vd(n, vcast_vd_d(0)); + vopmask qisodd = vneq_vo_vd_vd(vcast_vd_d(0), vcast_vd_d(0)); + + for(int i=0;i<21;i++) { // ceil(log2(DBL_MAX) / 52) + q = vrintk2_vd_vd(vmul_vd_vd_vd(vd2getx_vd_vd2(r), rd)); +#ifndef ENABLE_FMA_DP + q = vreinterpret_vd_vm(vand_vm_vm_vm(vreinterpret_vm_vd(q), vcast_vm_i_i(0xffffffff, 0xfffffffe))); +#endif + q = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vabs_vd_vd(vd2getx_vd_vd2(r)), vmul_vd_vd_vd(d, vcast_vd_d(1.5))), vmulsign_vd_vd_vd(vcast_vd_d(1.0), vd2getx_vd_vd2(r)), q); + q = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(vabs_vd_vd(vd2getx_vd_vd2(r)), vmul_vd_vd_vd(d, vcast_vd_d(0.5))), + vandnot_vo_vo_vo(qisodd, veq_vo_vd_vd(vabs_vd_vd(vd2getx_vd_vd2(r)), vmul_vd_vd_vd(d, vcast_vd_d(0.5))))), + vcast_vd_d(0.0), q); + if (vtestallones_i_vo64(veq_vo_vd_vd(q, vcast_vd_d(0)))) break; + q = vsel_vd_vo_vd_vd(visinf_vo_vd(vmul_vd_vd_vd(q, vneg_vd_vd(d))), vadd_vd_vd_vd(q, vmulsign_vd_vd_vd(vcast_vd_d(-1), vd2getx_vd_vd2(r))), q); + qisodd = vxor_vo_vo_vo(qisodd, visodd_vo_vd(q)); + r = ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd2(r, ddmul_vd2_vd_vd(q, vneg_vd_vd(d)))); + } + + vdouble ret = vmul_vd_vd_vd(vd2getx_vd_vd2(r), s); + ret = vmulsign_vd_vd_vd(ret, x); + ret = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vsel_vd_vo_vd_vd(visinf_vo_vd(x), vcast_vd_d(SLEEF_NAN), x), ret); + ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), ret); + return ret; +} + +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) + typedef struct { + vdouble2 a, b; + } dd2; + +static dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) { + dd2 r = { a, b }; + return r; +} +static vdouble2 dd2geta_vd2_dd2(dd2 d) { return d.a; } +static vdouble2 dd2getb_vd2_dd2(dd2 d) { return d.b; } +#endif + +/* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ +static CONST dd2 gammak(vdouble a) { + vdouble2 clc = vcast_vd2_d_d(0, 0), clln = vcast_vd2_d_d(1, 0), clld = vcast_vd2_d_d(1, 0); + vdouble2 v = vcast_vd2_d_d(1, 0), x, y, z; + vdouble t, u; + + vopmask otiny = vlt_vo_vd_vd(vabs_vd_vd(a), vcast_vd_d(1e-306)), oref = vlt_vo_vd_vd(a, vcast_vd_d(0.5)); + + x = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_d_d(0, 0), + vsel_vd2_vo_vd2_vd2(oref, ddadd2_vd2_vd_vd(vcast_vd_d(1), vneg_vd_vd(a)), + vcast_vd2_vd_vd(a, vcast_vd_d(0)))); + + vopmask o0 = vand_vo_vo_vo(vle_vo_vd_vd(vcast_vd_d(0.5), vd2getx_vd_vd2(x)), vle_vo_vd_vd(vd2getx_vd_vd2(x), vcast_vd_d(1.1))); + vopmask o2 = vle_vo_vd_vd(vcast_vd_d(2.3), vd2getx_vd_vd2(x)); + + y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(1)), x)); + y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(2)), y)); + y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(3)), y)); + y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(4)), y)); + + vopmask o = vand_vo_vo_vo(o2, vle_vo_vd_vd(vd2getx_vd_vd2(x), vcast_vd_d(7))); + clln = vsel_vd2_vo_vd2_vd2(o, y, clln); + + x = vsel_vd2_vo_vd2_vd2(o, ddadd2_vd2_vd2_vd(x, vcast_vd_d(5)), x); + + t = vsel_vd_vo_vd_vd(o2, vrec_vd_vd(vd2getx_vd_vd2(x)), vd2getx_vd_vd2(ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd(x, vsel_vd_vo_d_d(o0, -1, -2))))); + + u = vsel_vd_vo_vo_d_d_d(o2, o0, -156.801412704022726379848862, +0.2947916772827614196e+2, +0.7074816000864609279e-7); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +1.120804464289911606838558160000, +0.1281459691827820109e+3, +0.4009244333008730443e-6)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +13.39798545514258921833306020000, +0.2617544025784515043e+3, +0.1040114641628246946e-5)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.116546276599463200848033357000, +0.3287022855685790432e+3, +0.1508349150733329167e-5)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -1.391801093265337481495562410000, +0.2818145867730348186e+3, +0.1288143074933901020e-5)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.015056113040026424412918973400, +0.1728670414673559605e+3, +0.4744167749884993937e-6)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.179540117061234856098844714000, +0.7748735764030416817e+2, -0.6554816306542489902e-7)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.002481743600264997730942489280, +0.2512856643080930752e+2, -0.3189252471452599844e-6)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.029527880945699120504851034100, +0.5766792106140076868e+1, +0.1358883821470355377e-6)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.000540164767892604515196325186, +0.7270275473996180571e+0, -0.4343931277157336040e-6)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.006403362833808069794787256200, +0.8396709124579147809e-1, +0.9724785897406779555e-6)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.000162516262783915816896611252, -0.8211558669746804595e-1, -0.2036886057225966011e-5)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.001914438498565477526465972390, +0.6828831828341884458e-1, +0.4373363141819725815e-5)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +7.20489541602001055898311517e-05, -0.7712481339961671511e-1, -0.9439951268304008677e-5)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.000839498720672087279971000786, +0.8337492023017314957e-1, +0.2050727030376389804e-4)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -5.17179090826059219329394422e-05, -0.9094964931456242518e-1, -0.4492620183431184018e-4)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.000592166437353693882857342347, +0.1000996313575929358e+0, +0.9945751236071875931e-4)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +6.97281375836585777403743539e-05, -0.1113342861544207724e+0, -0.2231547599034983196e-3)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.000784039221720066627493314301, +0.1255096673213020875e+0, +0.5096695247101967622e-3)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.000229472093621399176949318732, -0.1440498967843054368e+0, -0.1192753911667886971e-2)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.002681327160493827160473958490, +0.1695571770041949811e+0, +0.2890510330742210310e-2)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.003472222222222222222175164840, -0.2073855510284092762e+0, -0.7385551028674461858e-2)); + u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.083333333333333333335592087900, +0.2705808084277815939e+0, +0.2058080842778455335e-1)); + + y = ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(-0.5)), logk2(x)); + y = ddadd2_vd2_vd2_vd2(y, ddneg_vd2_vd2(x)); + y = ddadd2_vd2_vd2_vd2(y, vcast_vd2_d_d(0.91893853320467278056, -3.8782941580672414498e-17)); // 0.5*log(2*M_PI) + + z = ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd (u, t), vsel_vd_vo_d_d(o0, -0.4006856343865314862e+0, -0.6735230105319810201e-1)); + z = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(z, t), vsel_vd_vo_d_d(o0, +0.8224670334241132030e+0, +0.3224670334241132030e+0)); + z = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(z, t), vsel_vd_vo_d_d(o0, -0.5772156649015328655e+0, +0.4227843350984671345e+0)); + z = ddmul_vd2_vd2_vd(z, t); + + clc = vsel_vd2_vo_vd2_vd2(o2, y, z); + + clld = vsel_vd2_vo_vd2_vd2(o2, ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd(u, t), vcast_vd_d(1)), clld); + + y = clln; + + clc = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_d_d(83.1776616671934334590333, 3.67103459631568507221878e-15), // log(2^120) + vsel_vd2_vo_vd2_vd2(oref, ddadd2_vd2_vd2_vd2(vcast_vd2_d_d(1.1447298858494001639, 1.026595116270782638e-17), ddneg_vd2_vd2(clc)), clc)); // log(M_PI) + clln = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_d_d(1, 0), vsel_vd2_vo_vd2_vd2(oref, clln, clld)); + + if (!vtestallones_i_vo64(vnot_vo64_vo64(oref))) { + t = vsub_vd_vd_vd(a, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 28), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(a, vcast_vd_d(1.0 / (INT64_C(1) << 28))))))); + x = ddmul_vd2_vd2_vd2(clld, sinpik(t)); + } + + clld = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_vd_vd(vmul_vd_vd_vd(a, vcast_vd_d((INT64_C(1) << 60)*(double)(INT64_C(1) << 60))), vcast_vd_d(0)), + vsel_vd2_vo_vd2_vd2(oref, x, y)); + + return dd2setab_dd2_vd2_vd2(clc, dddiv_vd2_vd2_vd2(clln, clld)); +} + +EXPORT CONST VECTOR_CC vdouble xtgamma_u1(vdouble a) { + dd2 d = gammak(a); + vdouble2 y = ddmul_vd2_vd2_vd2(expk2(dd2geta_vd2_dd2(d)), dd2getb_vd2_dd2(d)); + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(y), vd2gety_vd_vd2(y)); + vopmask o; + + o = vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(a, vcast_vd_d(-SLEEF_INFINITY)), + vand_vo_vo_vo(vlt_vo_vd_vd(a, vcast_vd_d(0)), visint_vo_vd(a))), + vand_vo_vo_vo(vand_vo_vo_vo(visnumber_vo_vd(a), vlt_vo_vd_vd(a, vcast_vd_d(0))), visnan_vo_vd(r))); + r = vsel_vd_vo_vd_vd(o, vcast_vd_d(SLEEF_NAN), r); + + o = vand_vo_vo_vo(vand_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(a, vcast_vd_d(SLEEF_INFINITY)), visnumber_vo_vd(a)), + vge_vo_vd_vd(a, vcast_vd_d(-DBL_MIN))), + vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(a, vcast_vd_d(0)), vgt_vo_vd_vd(a, vcast_vd_d(200))), visnan_vo_vd(r))); + r = vsel_vd_vo_vd_vd(o, vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), a), r); + + return r; +} + +EXPORT CONST VECTOR_CC vdouble xlgamma_u1(vdouble a) { + dd2 d = gammak(a); + vdouble2 y = ddadd2_vd2_vd2_vd2(dd2geta_vd2_dd2(d), logk2(ddabs_vd2_vd2(dd2getb_vd2_dd2(d)))); + vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(y), vd2gety_vd_vd2(y)); + vopmask o; + + o = vor_vo_vo_vo(visinf_vo_vd(a), + vor_vo_vo_vo(vand_vo_vo_vo(vle_vo_vd_vd(a, vcast_vd_d(0)), visint_vo_vd(a)), + vand_vo_vo_vo(visnumber_vo_vd(a), visnan_vo_vd(r)))); + r = vsel_vd_vo_vd_vd(o, vcast_vd_d(SLEEF_INFINITY), r); + + return r; +} + +/* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ +EXPORT CONST VECTOR_CC vdouble xerf_u1(vdouble a) { + vdouble s = a, t, u; + vdouble2 d; + + a = vabs_vd_vd(a); + vopmask o0 = vlt_vo_vd_vd(a, vcast_vd_d(1.0)); + vopmask o1 = vlt_vo_vd_vd(a, vcast_vd_d(3.7)); + vopmask o2 = vlt_vo_vd_vd(a, vcast_vd_d(6.0)); + u = vsel_vd_vo_vd_vd(o0, vmul_vd_vd_vd(a, a), a); + + t = vsel_vd_vo_vo_d_d_d(o0, o1, +0.6801072401395392157e-20, +0.2830954522087717660e-13, -0.5846750404269610493e-17); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.2161766247570056391e-18, -0.1509491946179481940e-11, +0.6076691048812607898e-15)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.4695919173301598752e-17, +0.3827857177807173152e-10, -0.3007518609604893831e-13)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.9049140419888010819e-16, -0.6139733921558987241e-09, +0.9427906260824646063e-12)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1634018903557411517e-14, +0.6985387934608038824e-08, -0.2100110908269393629e-10)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.2783485786333455216e-13, -0.5988224513034371474e-07, +0.3534639523461223473e-09)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.4463221276786412722e-12, +0.4005716952355346640e-06, -0.4664967728285395926e-08)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.6711366622850138987e-11, -0.2132190104575784400e-05, +0.4943823283769000532e-07)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.9422759050232658346e-10, +0.9092461304042630325e-05, -0.4271203394761148254e-06)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.1229055530100228477e-08, -0.3079188080966205457e-04, +0.3034067677404915895e-05)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1480719281585085023e-07, +0.7971413443082370762e-04, -0.1776295289066871135e-04)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.1636584469123402714e-06, -0.1387853215225442864e-03, +0.8524547630559505050e-04)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1646211436588923363e-05, +0.6469678026257590965e-04, -0.3290582944961784398e-03)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.1492565035840624866e-04, +0.4996645280372945860e-03, +0.9696966068789101157e-03)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1205533298178966496e-03, -0.1622802482842520535e-02, -0.1812527628046986137e-02)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.8548327023450851166e-03, +0.1615320557049377171e-03, -0.4725409828123619017e-03)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.5223977625442188799e-02, +0.1915262325574875607e-01, +0.2090315427924229266e-01)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.2686617064513125569e-01, -0.1027818298486033455e+00, -0.1052041921842776645e+00)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1128379167095512753e+00, -0.6366172819842503827e+00, -0.6345351808766568347e+00)); + t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.3761263890318375380e+00, -0.1128379590648910469e+01, -0.1129442929103524396e+01)); + d = ddmul_vd2_vd_vd(t, u); + + d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_d_d_d(o0, o1, 1.1283791670955125586, 3.4110644736196137587e-08, 0.00024963035690526438285), + vsel_vd_vo_vo_d_d_d(o0, o1, 1.5335459613165822674e-17, -2.4875650708323294246e-24, -5.4362665034856259795e-21))); + d = vsel_vd2_vo_vd2_vd2(o0, ddmul_vd2_vd2_vd(d, a), ddadd_vd2_vd_vd2(vcast_vd_d(1.0), ddneg_vd2_vd2(expk2(d)))); + + u = vmulsign_vd_vd_vd(vsel_vd_vo_vd_vd(o2, vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(1)), s); + u = vsel_vd_vo_vd_vd(visnan_vo_vd(a), vcast_vd_d(SLEEF_NAN), u); + + return u; +} + +/* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ +EXPORT CONST VECTOR_CC vdouble xerfc_u15(vdouble a) { + vdouble s = a, r = vcast_vd_d(0), t; + vdouble2 u, d, x; + a = vabs_vd_vd(a); + vopmask o0 = vlt_vo_vd_vd(a, vcast_vd_d(1.0)); + vopmask o1 = vlt_vo_vd_vd(a, vcast_vd_d(2.2)); + vopmask o2 = vlt_vo_vd_vd(a, vcast_vd_d(4.2)); + vopmask o3 = vlt_vo_vd_vd(a, vcast_vd_d(27.3)); + + u = vsel_vd2_vo_vd2_vd2(o0, ddmul_vd2_vd_vd(a, a), vsel_vd2_vo_vd2_vd2(o1, vcast_vd2_vd_vd(a, vcast_vd_d(0)), dddiv_vd2_vd2_vd2(vcast_vd2_d_d(1, 0), vcast_vd2_vd_vd(a, vcast_vd_d(0))))); + + t = vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.6801072401395386139e-20, +0.3438010341362585303e-12, -0.5757819536420710449e+2, +0.2334249729638701319e+5); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.2161766247570055669e-18, -0.1237021188160598264e-10, +0.4669289654498104483e+3, -0.4695661044933107769e+5)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.4695919173301595670e-17, +0.2117985839877627852e-09, -0.1796329879461355858e+4, +0.3173403108748643353e+5)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.9049140419888007122e-16, -0.2290560929177369506e-08, +0.4355892193699575728e+4, +0.3242982786959573787e+4)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1634018903557410728e-14, +0.1748931621698149538e-07, -0.7456258884965764992e+4, -0.2014717999760347811e+5)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.2783485786333451745e-13, -0.9956602606623249195e-07, +0.9553977358167021521e+4, +0.1554006970967118286e+5)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.4463221276786415752e-12, +0.4330010240640327080e-06, -0.9470019905444229153e+4, -0.6150874190563554293e+4)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.6711366622850136563e-11, -0.1435050600991763331e-05, +0.7387344321849855078e+4, +0.1240047765634815732e+4)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.9422759050232662223e-10, +0.3460139479650695662e-05, -0.4557713054166382790e+4, -0.8210325475752699731e+2)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.1229055530100229098e-08, -0.4988908180632898173e-05, +0.2207866967354055305e+4, +0.3242443880839930870e+2)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1480719281585086512e-07, -0.1308775976326352012e-05, -0.8217975658621754746e+3, -0.2923418863833160586e+2)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.1636584469123399803e-06, +0.2825086540850310103e-04, +0.2268659483507917400e+3, +0.3457461732814383071e+0)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1646211436588923575e-05, -0.6393913713069986071e-04, -0.4633361260318560682e+2, +0.5489730155952392998e+1)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.1492565035840623511e-04, -0.2566436514695078926e-04, +0.9557380123733945965e+1, +0.1559934132251294134e-2)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1205533298178967851e-03, +0.5895792375659440364e-03, -0.2958429331939661289e+1, -0.1541741566831520638e+1)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.8548327023450850081e-03, -0.1695715579163588598e-02, +0.1670329508092765480e+0, +0.2823152230558364186e-5)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.5223977625442187932e-02, +0.2089116434918055149e-03, +0.6096615680115419211e+0, +0.6249999184195342838e+0)); + t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.2686617064513125222e-01, +0.1912855949584917753e-01, +0.1059212443193543585e-2, +0.1741749416408701288e-8)); + + d = ddmul_vd2_vd2_vd(u, t); + d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 0.11283791670955126141, -0.10277263343147646779, -0.50005180473999022439, -0.5000000000258444377), + vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -4.0175691625932118483e-18, -6.2338714083404900225e-18, 2.6362140569041995803e-17, -4.0074044712386992281e-17))); + d = ddmul_vd2_vd2_vd2(d, u); + d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.37612638903183753802, -0.63661976742916359662, 1.601106273924963368e-06, 2.3761973137523364792e-13), + vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 1.3391897206042552387e-17, 7.6321019159085724662e-18, 1.1974001857764476775e-23, -1.1670076950531026582e-29))); + d = ddmul_vd2_vd2_vd2(d, u); + d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 1.1283791670955125586, -1.1283791674717296161, -0.57236496645145429341, -0.57236494292470108114), + vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 1.5335459613165822674e-17, 8.0896847755965377194e-17, 3.0704553245872027258e-17, -2.3984352208056898003e-17))); + + x = ddmul_vd2_vd2_vd(vsel_vd2_vo_vd2_vd2(o1, d, vcast_vd2_vd_vd(vneg_vd_vd(a), vcast_vd_d(0))), a); + x = vsel_vd2_vo_vd2_vd2(o1, x, ddadd2_vd2_vd2_vd2(x, d)); + x = vsel_vd2_vo_vd2_vd2(o0, ddsub_vd2_vd2_vd2(vcast_vd2_d_d(1, 0), x), expk2(x)); + x = vsel_vd2_vo_vd2_vd2(o1, x, ddmul_vd2_vd2_vd2(x, u)); + + r = vsel_vd_vo_vd_vd(o3, vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)), vcast_vd_d(0)); + r = vsel_vd_vo_vd_vd(vsignbit_vo_vd(s), vsub_vd_vd_vd(vcast_vd_d(2), r), r); + r = vsel_vd_vo_vd_vd(visnan_vo_vd(s), vcast_vd_d(SLEEF_NAN), r); + return r; +} +#endif // #if !defined(DETERMINISTIC) + +#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) +// The normal and deterministic versions of implementations are common +// for the functions like sincospi_u05. Aliases are defined by +// DALIAS_* macros for such functions. The defined aliases +// (e.g. ysincospi_u05) are renamed(e.g. to +// Sleef_cinz_sincospid2_u05sse2) by rename*.h. + +#ifdef ENABLE_ALIAS +#define DALIAS_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble) __attribute__((alias( stringify(x ## FUNC) ))); +#define DALIAS_vd2_vd(FUNC) EXPORT CONST VECTOR_CC vdouble2 y ## FUNC(vdouble) __attribute__((alias( stringify(x ## FUNC) ))); +#define DALIAS_vi_vd(FUNC) EXPORT CONST VECTOR_CC vint y ## FUNC(vdouble) __attribute__((alias( stringify(x ## FUNC) ))); +#define DALIAS_vd_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble, vdouble) __attribute__((alias( stringify(x ## FUNC) ))); +#define DALIAS_vd_vd_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble, vdouble, vdouble) __attribute__((alias( stringify(x ## FUNC) ))); +#else +#define DALIAS_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble d) { return x ## FUNC (d); } +#define DALIAS_vd2_vd(FUNC) EXPORT CONST VECTOR_CC vdouble2 y ## FUNC(vdouble d) { return x ## FUNC (d); } +#define DALIAS_vi_vd(FUNC) EXPORT CONST VECTOR_CC vint y ## FUNC(vdouble d) { return x ## FUNC (d); } +#define DALIAS_vd_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble x, vdouble y) { return x ## FUNC (x, y); } +#define DALIAS_vd_vd_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble x, vdouble y, vdouble z) { return x ## FUNC (x, y, z); } +#endif + +/* DALIAS_vd2_vd(sincospi_u05) */ +/* DALIAS_vd2_vd(sincospi_u35) */ +/* DALIAS_vd2_vd(modf) */ +/* DALIAS_vd_vd(log) */ +/* DALIAS_vd_vd(log_u1) */ +/* DALIAS_vd_vd_vd(pow) */ +/* DALIAS_vd_vd(sinh) */ +/* DALIAS_vd_vd(cosh) */ +/* DALIAS_vd_vd(tanh) */ +/* DALIAS_vd_vd(sinh_u35) */ +/* DALIAS_vd_vd(cosh_u35) */ +/* DALIAS_vd_vd(tanh_u35) */ +/* DALIAS_vd_vd(asinh) */ +/* DALIAS_vd_vd(acosh) */ +/* DALIAS_vd_vd(atanh) */ +/* DALIAS_vd_vd(cbrt) */ +/* DALIAS_vd_vd(cbrt_u1) */ +/* DALIAS_vd_vd(expm1) */ +/* DALIAS_vd_vd(log10) */ +/* DALIAS_vd_vd(log2) */ +/* DALIAS_vd_vd(log2_u35) */ +/* DALIAS_vd_vd(log1p) */ +/* DALIAS_vd_vd(fabs) */ +/* DALIAS_vd_vd_vd(copysign) */ +/* DALIAS_vd_vd_vd(fmax) */ +/* DALIAS_vd_vd_vd(fmin) */ +/* DALIAS_vd_vd_vd(fdim) */ +/* DALIAS_vd_vd(trunc) */ +/* DALIAS_vd_vd(floor) */ +/* DALIAS_vd_vd(ceil) */ +/* DALIAS_vd_vd(round) */ +/* DALIAS_vd_vd(rint) */ +/* DALIAS_vd_vd_vd(nextafter) */ +/* DALIAS_vd_vd(frfrexp) */ +/* DALIAS_vi_vd(expfrexp) */ +/* DALIAS_vd_vd_vd_vd(fma) */ +/* DALIAS_vd_vd(sqrt_u05) */ +/* DALIAS_vd_vd(sqrt_u35) */ +/* DALIAS_vd_vd_vd(hypot_u05) */ +/* DALIAS_vd_vd_vd(hypot_u35) */ +/* DALIAS_vd_vd_vd(fmod) */ +/* DALIAS_vd_vd_vd(remainder) */ +/* DALIAS_vd_vd(tgamma_u1) */ +/* DALIAS_vd_vd(lgamma_u1) */ +/* DALIAS_vd_vd(erf_u1) */ +/* DALIAS_vd_vd(erfc_u15) */ +#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) + +#if !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) +EXPORT CONST int xgetInt(int name) { + if (1 <= name && name <= 10) return vavailability_i(name); + return 0; +} + +EXPORT CONST void *xgetPtr(int name) { + if (name == 0) return ISANAME; + return (void *)0; +} +#endif + +#if defined(ALIAS_NO_EXT_SUFFIX) && !defined(DETERMINISTIC) +#include ALIAS_NO_EXT_SUFFIX +#endif + +#ifdef ENABLE_MAIN +// gcc -DENABLE_MAIN -Wno-attributes -I../common -I../arch -DENABLE_AVX2 -mavx2 -mfma sleefsimddp.c rempitab.c ../common/common.c -lm +#include +#include +#include +int main(int argc, char **argv) { + vdouble d1 = vcast_vd_d(atof(argv[1])); + vdouble d2 = vcast_vd_d(atof(argv[2])); + //vdouble d3 = vcast_vd_d(atof(argv[3])); + //vdouble r = xnextafter(d1, d2); + //int i; + //double fr = frexp(atof(argv[1]), &i); + //printf("%.20g\n", xfma(d1, d2, d3)[0]);; + //printf("test %.20g\n", xtgamma_u1(d1)[0]); + //printf("corr %.20g\n", tgamma(d1[0])); + //printf("test %.20g\n", xerf_u1(d1)[0]); + //printf("corr %.20g\n", erf(d1[0])); + //printf("test %.20g\n", xerfc_u15(d1)[0]); + //printf("corr %.20g\n", erfc(d1[0])); + //printf("%.20g\n", nextafter(d1[0], d2[0]));; + //printf("%.20g\n", vcast_d_vd(xhypot_u05(d1, d2))); + //printf("%.20g\n", fr); + printf("%.20g\n", fmod(atof(argv[1]), atof(argv[2]))); + printf("%.20g\n", xfmod(d1, d2)[0]); + //vdouble2 r = xsincospi_u35(a); + //printf("%g, %g\n", vcast_d_vd(r.x), vcast_d_vd(r.y)); +} +#endif + +#ifdef ENABLE_GNUABI +/* "finite" aliases for compatibility with GLIBC */ +EXPORT CONST VECTOR_CC vdouble __acos_finite (vdouble) __attribute__((weak, alias(str_xacos ))); +EXPORT CONST VECTOR_CC vdouble __acosh_finite (vdouble) __attribute__((weak, alias(str_xacosh ))); +EXPORT CONST VECTOR_CC vdouble __asin_finite (vdouble) __attribute__((weak, alias(str_xasin_u1 ))); +EXPORT CONST VECTOR_CC vdouble __atan2_finite (vdouble, vdouble) __attribute__((weak, alias(str_xatan2_u1 ))); +EXPORT CONST VECTOR_CC vdouble __atanh_finite (vdouble) __attribute__((weak, alias(str_xatanh ))); +EXPORT CONST VECTOR_CC vdouble __cosh_finite (vdouble) __attribute__((weak, alias(str_xcosh ))); +EXPORT CONST VECTOR_CC vdouble __exp10_finite (vdouble) __attribute__((weak, alias(str_xexp10 ))); +EXPORT CONST VECTOR_CC vdouble __exp2_finite (vdouble) __attribute__((weak, alias(str_xexp2 ))); +EXPORT CONST VECTOR_CC vdouble __exp_finite (vdouble) __attribute__((weak, alias(str_xexp ))); +EXPORT CONST VECTOR_CC vdouble __fmod_finite (vdouble, vdouble) __attribute__((weak, alias(str_xfmod ))); +EXPORT CONST VECTOR_CC vdouble __remainder_finite(vdouble, vdouble) __attribute__((weak, alias(str_xremainder))); +EXPORT CONST VECTOR_CC vdouble __modf_finite (vdouble, vdouble *) __attribute__((weak, alias(str_xmodf ))); +EXPORT CONST VECTOR_CC vdouble __hypot_u05_finite(vdouble, vdouble) __attribute__((weak, alias(str_xhypot_u05))); +EXPORT CONST VECTOR_CC vdouble __lgamma_u1_finite(vdouble) __attribute__((weak, alias(str_xlgamma_u1))); +EXPORT CONST VECTOR_CC vdouble __log10_finite (vdouble) __attribute__((weak, alias(str_xlog10 ))); +EXPORT CONST VECTOR_CC vdouble __log_finite (vdouble) __attribute__((weak, alias(str_xlog_u1 ))); +EXPORT CONST VECTOR_CC vdouble __pow_finite (vdouble, vdouble) __attribute__((weak, alias(str_xpow ))); +EXPORT CONST VECTOR_CC vdouble __sinh_finite (vdouble) __attribute__((weak, alias(str_xsinh ))); +EXPORT CONST VECTOR_CC vdouble __sqrt_finite (vdouble) __attribute__((weak, alias(str_xsqrt ))); +EXPORT CONST VECTOR_CC vdouble __tgamma_u1_finite(vdouble) __attribute__((weak, alias(str_xtgamma_u1))); + +#ifdef HEADER_MASKED +#include HEADER_MASKED +#endif +#endif /* #ifdef ENABLE_GNUABI */ diff --git a/src/sleefsimddp_emulation.c b/src/sleefsimddp_emulation.c new file mode 100644 index 00000000..0ec0d6e4 --- /dev/null +++ b/src/sleefsimddp_emulation.c @@ -0,0 +1,580 @@ +/* + +Copyright (c) 2021 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include + +#ifdef ENABLE_NEON32 +#include "renameneon32.h" +#define nsimd_vec_f64 nsimd_neon128_vf64 +#endif + +#ifdef ENABLE_VSX +#include "renamevsx.h" +#define nsimd_vec_f64 nsimd_vmx_vf64 +#endif + + +nsimd_vec_f64 xsin(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_sin_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + + +nsimd_vec_f64 xcos(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_cos_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xtan(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_tan_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xasin(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_asin_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xacos(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_acos_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xatan(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_atan_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xatan2(nsimd_vec_f64 a0_, nsimd_vec_f64 a1_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, a1, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + a1.v0 = a1_.v0; + a1.v1 = a1_.v1; + ret = nsimd_atan2_u35_cpu_f64(a0, a1); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xlog(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_log_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xcbrt(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_cbrt_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xsin_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_sin_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xcos_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_cos_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xtan_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_tan_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xasin_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_asin_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xacos_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_acos_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xatan_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_atan_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xatan2_u1(nsimd_vec_f64 a0_, nsimd_vec_f64 a1_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, a1, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + a1.v0 = a1_.v0; + a1.v1 = a1_.v1; + ret = nsimd_atan2_u10_cpu_f64(a0, a1); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xlog_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_log_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xcbrt_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_cbrt_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xexp(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_exp_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xpow(nsimd_vec_f64 a0_, nsimd_vec_f64 a1_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, a1, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + a1.v0 = a1_.v0; + a1.v1 = a1_.v1; + ret = nsimd_pow_u10_cpu_f64(a0, a1); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xsinh(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_sinh_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xcosh(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_cosh_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xtanh(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_tanh_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xsinh_u35(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_sinh_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xcosh_u35(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_cosh_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xtanh_u35(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_tanh_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xasinh(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_asinh_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xacosh(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_acosh_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xatanh(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_atanh_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xexp2(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_exp2_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xexp2_u35(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_exp2_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xexp10(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_exp10_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xexp10_u35(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_exp10_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xexpm1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_expm1_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xlog10(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_log10_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xlog2(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_log2_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xlog2_u35(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_log2_u35_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xlog1p(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_log1p_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xsinpi_u05(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_sinpi_u05_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xcospi_u05(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_cospi_u05_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xhypot_u05(nsimd_vec_f64 a0_, nsimd_vec_f64 a1_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, a1, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + a1.v0 = a1_.v0; + a1.v1 = a1_.v1; + ret = nsimd_hypot_u05_cpu_f64(a0, a1); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xhypot_u35(nsimd_vec_f64 a0_, nsimd_vec_f64 a1_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, a1, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + a1.v0 = a1_.v0; + a1.v1 = a1_.v1; + ret = nsimd_hypot_u35_cpu_f64(a0, a1); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xfmod(nsimd_vec_f64 a0_, nsimd_vec_f64 a1_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, a1, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + a1.v0 = a1_.v0; + a1.v1 = a1_.v1; + ret = nsimd_fmod_cpu_f64(a0, a1); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xremainder(nsimd_vec_f64 a0_, nsimd_vec_f64 a1_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, a1, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + a1.v0 = a1_.v0; + a1.v1 = a1_.v1; + ret = nsimd_remainder_cpu_f64(a0, a1); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xlgamma_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_lgamma_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xtgamma_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_tgamma_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xerf_u1(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_erf_u10_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + +nsimd_vec_f64 xerfc_u15(nsimd_vec_f64 a0_) { + nsimd_vec_f64 ret_; + nsimd_cpu_vf64 a0, ret; + a0.v0 = a0_.v0; + a0.v1 = a0_.v1; + ret = nsimd_erfc_u15_cpu_f64(a0); + ret_.v0 = ret.v0; + ret_.v1 = ret.v1; + return ret_; +} + diff --git a/src/sleefsimdsp.c b/src/sleefsimdsp.c new file mode 100644 index 00000000..aedd1ed1 --- /dev/null +++ b/src/sleefsimdsp.c @@ -0,0 +1,3546 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// Always use -ffp-contract=off option to compile SLEEF. + +#if !defined(SLEEF_GENHEADER) +#include +#include +#include +#include +#endif + +#include "misc.h" + +extern const float Sleef_rempitabsp[]; + +#define __SLEEFSIMDSP_C__ + +#if (defined(_MSC_VER)) +#pragma fp_contract (off) +#endif + +// Intel + +#ifdef ENABLE_SSE2 +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#include "helpersse2.h" +#else +#include "macroonlySSE2.h" +#endif +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renamesse2_gnuabi.h" +#else +#include "renamesse2.h" +#endif +#endif +#endif + +#ifdef ENABLE_SSE4 +#define CONFIG 4 +#if !defined(SLEEF_GENHEADER) +#include "helpersse2.h" +#else +#include "macroonlySSE4.h" +#endif +#ifdef DORENAME +#include "renamesse4.h" +#endif +#endif + +#ifdef ENABLE_AVX +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helperavx.h" +#else +#include "macroonlyAVX.h" +#endif +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renameavx_gnuabi.h" +#else +#include "renameavx.h" +#endif +#endif +#endif + +#ifdef ENABLE_FMA4 +#define CONFIG 4 +#if !defined(SLEEF_GENHEADER) +#include "helperavx.h" +#else +#include "macroonlyFMA4.h" +#endif +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renamefma4_gnuabi.h" +#else +#include "renamefma4.h" +#endif +#endif +#endif + +#ifdef ENABLE_AVX2 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helperavx2.h" +#else +#include "macroonlyAVX2.h" +#endif +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renameavx2_gnuabi.h" +#else +#include "renameavx2.h" +#endif +#endif +#endif + +#ifdef ENABLE_AVX2128 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helperavx2_128.h" +#else +#include "macroonlyAVX2128.h" +#endif +#ifdef DORENAME +#include "renameavx2128.h" +#endif +#endif + +#ifdef ENABLE_AVX512F +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helperavx512f.h" +#else +#include "macroonlyAVX512F.h" +#endif +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renameavx512f_gnuabi.h" +#else +#include "renameavx512f.h" +#endif +#endif +#endif + +#ifdef ENABLE_AVX512FNOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#include "helperavx512f.h" +#else +#include "macroonlyAVX512FNOFMA.h" +#endif +#ifdef DORENAME +#include "renameavx512fnofma.h" +#endif +#endif + +// Arm + +#ifdef ENABLE_ADVSIMD +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helperadvsimd.h" +#else +#include "macroonlyADVSIMD.h" +#endif +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renameadvsimd_gnuabi.h" +#else +#include "renameadvsimd.h" +#endif +#endif +#endif + +#ifdef ENABLE_ADVSIMDNOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#include "helperadvsimd.h" +#else +#include "macroonlyADVSIMDNOFMA.h" +#endif +#ifdef DORENAME +#include "renameadvsimdnofma.h" +#endif +#endif + +#ifdef ENABLE_NEON32 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helperneon32.h" +#endif +#ifdef DORENAME +#include "renameneon32.h" +#endif +#endif + +#ifdef ENABLE_NEON32VFPV4 +#define CONFIG 4 +#if !defined(SLEEF_GENHEADER) +#include "helperneon32.h" +#endif +#ifdef DORENAME +#include "renameneon32vfpv4.h" +#endif +#endif + +#ifdef ENABLE_SVE +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helpersve.h" +#else +#include "macroonlySVE.h" +#endif +#ifdef DORENAME +#ifdef ENABLE_GNUABI +#include "renamesve_gnuabi.h" +#else +#include "renamesve.h" +#endif /* ENABLE_GNUABI */ +#endif /* DORENAME */ +#endif /* ENABLE_SVE */ + +#ifdef ENABLE_SVENOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#include "helpersve.h" +#else +#include "macroonlySVENOFMA.h" +#endif +#ifdef DORENAME +#include "renamesvenofma.h" +#endif /* DORENAME */ +#endif /* ENABLE_SVE */ + +// IBM + +#ifdef ENABLE_VSX +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helperpower_128.h" +#else +#include "macroonlyVSX.h" +#endif +#ifdef DORENAME +#include "renamevsx.h" +#endif +#endif + +#ifdef ENABLE_VSXNOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#include "helperpower_128.h" +#else +#include "macroonlyVSXNOFMA.h" +#endif +#ifdef DORENAME +#include "renamevsxnofma.h" +#endif +#endif + +#ifdef ENABLE_ZVECTOR2 +#define CONFIG 140 +#if !defined(SLEEF_GENHEADER) +#include "helpers390x_128.h" +#else +#include "macroonlyZVECTOR2.h" +#endif +#ifdef DORENAME +#include "renamezvector2.h" +#endif +#endif + +#ifdef ENABLE_ZVECTOR2NOFMA +#define CONFIG 141 +#if !defined(SLEEF_GENHEADER) +#include "helpers390x_128.h" +#else +#include "macroonlyZVECTOR2NOFMA.h" +#endif +#ifdef DORENAME +#include "renamezvector2nofma.h" +#endif +#endif + +// Generic + +#ifdef ENABLE_VECEXT +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helpervecext.h" +#endif +#ifdef DORENAME +#include "renamevecext.h" +#endif +#endif + +#ifdef ENABLE_PUREC +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helperpurec.h" +#endif +#ifdef DORENAME +#include "renamepurec.h" +#endif +#endif + +#ifdef ENABLE_PUREC_SCALAR +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#include "helperpurec_scalar.h" +#else +#include "macroonlyPUREC_SCALAR.h" +#endif +#ifdef DORENAME +#include "renamepurec_scalar.h" +#endif +#endif + +#ifdef ENABLE_PURECFMA_SCALAR +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#include "helperpurec_scalar.h" +#else +#include "macroonlyPURECFMA_SCALAR.h" +#endif +#ifdef DORENAME +#include "renamepurecfma_scalar.h" +#endif +#endif + +// + +#define MLA(x, y, z) vmla_vf_vf_vf_vf((x), (y), (z)) +#define C2V(c) vcast_vf_f(c) +#include "estrin.h" + +// + +#include "df.h" + +static INLINE CONST VECTOR_CC vopmask visnegzero_vo_vf(vfloat d) { + return veq_vo_vi2_vi2(vreinterpret_vi2_vf(d), vreinterpret_vi2_vf(vcast_vf_f(-0.0))); +} + +static INLINE VECTOR_CC vopmask vnot_vo32_vo32(vopmask x) { + return vxor_vo_vo_vo(x, veq_vo_vi2_vi2(vcast_vi2_i(0), vcast_vi2_i(0))); +} + +static INLINE CONST VECTOR_CC vmask vsignbit_vm_vf(vfloat f) { + return vand_vm_vm_vm(vreinterpret_vm_vf(f), vreinterpret_vm_vf(vcast_vf_f(-0.0f))); +} + +static INLINE CONST VECTOR_CC vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { + return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); +} + +static INLINE CONST VECTOR_CC vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) { + return vreinterpret_vf_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(x)), + vand_vm_vm_vm (vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(y)))); +} + +static INLINE CONST VECTOR_CC vfloat vsign_vf_vf(vfloat f) { + return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(1.0f)), vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f)))); +} + +static INLINE CONST VECTOR_CC vopmask vsignbit_vo_vf(vfloat d) { + return veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0x80000000)), vcast_vi2_i(0x80000000)); +} + +static INLINE CONST VECTOR_CC vint2 vsel_vi2_vf_vf_vi2_vi2(vfloat f0, vfloat f1, vint2 x, vint2 y) { + return vsel_vi2_vo_vi2_vi2(vlt_vo_vf_vf(f0, f1), x, y); +} + +static INLINE CONST VECTOR_CC vint2 vsel_vi2_vf_vi2(vfloat d, vint2 x) { + return vand_vi2_vo_vi2(vsignbit_vo_vf(d), x); +} + +static INLINE CONST VECTOR_CC vopmask visint_vo_vf(vfloat y) { return veq_vo_vf_vf(vtruncate_vf_vf(y), y); } + +static INLINE CONST VECTOR_CC vopmask visnumber_vo_vf(vfloat x) { return vnot_vo32_vo32(vor_vo_vo_vo(visinf_vo_vf(x), visnan_vo_vf(x))); } + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) +static INLINE CONST VECTOR_CC vint2 vilogbk_vi2_vf(vfloat d) { + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(5.421010862427522E-20f)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(vcast_vf_f(1.8446744073709552E19f), d), d); + vint2 q = vand_vi2_vi2_vi2(vsrl_vi2_vi2_i(vreinterpret_vi2_vf(d), 23), vcast_vi2_i(0xff)); + q = vsub_vi2_vi2_vi2(q, vsel_vi2_vo_vi2_vi2(o, vcast_vi2_i(64 + 0x7f), vcast_vi2_i(0x7f))); + return q; +} + +static INLINE CONST VECTOR_CC vint2 vilogb2k_vi2_vf(vfloat d) { + vint2 q = vreinterpret_vi2_vf(d); + q = vsrl_vi2_vi2_i(q, 23); + q = vand_vi2_vi2_vi2(q, vcast_vi2_i(0xff)); + q = vsub_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)); + return q; +} +#endif + +// + +EXPORT CONST VECTOR_CC vint2 xilogbf(vfloat d) { + vint2 e = vilogbk_vi2_vf(vabs_vf_vf(d)); + e = vsel_vi2_vo_vi2_vi2(veq_vo_vf_vf(d, vcast_vf_f(0.0f)), vcast_vi2_i(SLEEF_FP_ILOGB0), e); + e = vsel_vi2_vo_vi2_vi2(visnan_vo_vf(d), vcast_vi2_i(SLEEF_FP_ILOGBNAN), e); + e = vsel_vi2_vo_vi2_vi2(visinf_vo_vf(d), vcast_vi2_i(INT_MAX), e); + return e; +} + +static INLINE CONST VECTOR_CC vfloat vpow2i_vf_vi2(vint2 q) { + return vreinterpret_vf_vi2(vsll_vi2_vi2_i(vadd_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)), 23)); +} + +static INLINE CONST VECTOR_CC vfloat vldexp_vf_vf_vi2(vfloat x, vint2 q) { + vfloat u; + vint2 m = vsra_vi2_vi2_i(q, 31); + m = vsll_vi2_vi2_i(vsub_vi2_vi2_vi2(vsra_vi2_vi2_i(vadd_vi2_vi2_vi2(m, q), 6), m), 4); + q = vsub_vi2_vi2_vi2(q, vsll_vi2_vi2_i(m, 2)); + m = vadd_vi2_vi2_vi2(m, vcast_vi2_i(0x7f)); + m = vand_vi2_vi2_vi2(vgt_vi2_vi2_vi2(m, vcast_vi2_i(0)), m); + vint2 n = vgt_vi2_vi2_vi2(m, vcast_vi2_i(0xff)); + m = vor_vi2_vi2_vi2(vandnot_vi2_vi2_vi2(n, m), vand_vi2_vi2_vi2(n, vcast_vi2_i(0xff))); + u = vreinterpret_vf_vi2(vsll_vi2_vi2_i(m, 23)); + x = vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(x, u), u), u), u); + u = vreinterpret_vf_vi2(vsll_vi2_vi2_i(vadd_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)), 23)); + return vmul_vf_vf_vf(x, u); +} + +static INLINE CONST VECTOR_CC vfloat vldexp2_vf_vf_vi2(vfloat d, vint2 e) { + return vmul_vf_vf_vf(vmul_vf_vf_vf(d, vpow2i_vf_vi2(vsra_vi2_vi2_i(e, 1))), vpow2i_vf_vi2(vsub_vi2_vi2_vi2(e, vsra_vi2_vi2_i(e, 1)))); +} + +static INLINE CONST VECTOR_CC vfloat vldexp3_vf_vf_vi2(vfloat d, vint2 q) { + return vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vsll_vi2_vi2_i(q, 23))); +} + +EXPORT CONST VECTOR_CC vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); } + +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +typedef struct { + vfloat d; + vint2 i; +} fi_t; + +static vfloat figetd_vf_di(fi_t d) { return d.d; } +static vint2 figeti_vi2_di(fi_t d) { return d.i; } +static fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) { + fi_t r = { d, i }; + return r; +} + +typedef struct { + vfloat2 df; + vint2 i; +} dfi_t; + +static vfloat2 dfigetdf_vf2_dfi(dfi_t d) { return d.df; } +static vint2 dfigeti_vi2_dfi(dfi_t d) { return d.i; } +static dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) { + dfi_t r = { v, i }; + return r; +} +static dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { + dfi.df = v; + return dfi; +} +#endif + +static INLINE CONST VECTOR_CC vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) { + return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); +} + +static INLINE CONST fi_t rempisubf(vfloat x) { +#ifdef FULL_FP_ROUNDING + vfloat y = vrint_vf_vf(vmul_vf_vf_vf(x, vcast_vf_f(4))); + vint2 vi = vtruncate_vi2_vf(vsub_vf_vf_vf(y, vmul_vf_vf_vf(vrint_vf_vf(x), vcast_vf_f(4)))); + return fisetdi_fi_vf_vi2(vsub_vf_vf_vf(x, vmul_vf_vf_vf(y, vcast_vf_f(0.25))), vi); +#else + vfloat c = vmulsign_vf_vf_vf(vcast_vf_f(1 << 23), x); + vfloat rint4x = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(vmul_vf_vf_vf(vcast_vf_f(4), x)), vcast_vf_f(1 << 23)), + vmul_vf_vf_vf(vcast_vf_f(4), x), + vorsign_vf_vf_vf(vsub_vf_vf_vf(vmla_vf_vf_vf_vf(vcast_vf_f(4), x, c), c), x)); + vfloat rintx = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(1 << 23)), + x, vorsign_vf_vf_vf(vsub_vf_vf_vf(vadd_vf_vf_vf(x, c), c), x)); + return fisetdi_fi_vf_vi2(vmla_vf_vf_vf_vf(vcast_vf_f(-0.25), rint4x, x), + vtruncate_vi2_vf(vmla_vf_vf_vf_vf(vcast_vf_f(-4), rintx, rint4x))); +#endif +} + +static INLINE CONST dfi_t rempif(vfloat a) { + vfloat2 x, y, z; + vint2 ex = vilogb2k_vi2_vf(a); +#if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) + ex = vandnot_vi2_vi2_vi2(vsra_vi2_vi2_i(ex, 31), ex); + ex = vand_vi2_vi2_vi2(ex, vcast_vi2_i(127)); +#endif + ex = vsub_vi2_vi2_vi2(ex, vcast_vi2_i(25)); + vint2 q = vand_vi2_vo_vi2(vgt_vo_vi2_vi2(ex, vcast_vi2_i(90-25)), vcast_vi2_i(-64)); + a = vldexp3_vf_vf_vi2(a, q); + ex = vandnot_vi2_vi2_vi2(vsra_vi2_vi2_i(ex, 31), ex); + ex = vsll_vi2_vi2_i(ex, 2); + x = dfmul_vf2_vf_vf(a, vgather_vf_p_vi2(Sleef_rempitabsp, ex)); + fi_t di = rempisubf(vf2getx_vf_vf2(x)); + q = figeti_vi2_di(di); + x = vf2setx_vf2_vf2_vf(x, figetd_vf_di(di)); + x = dfnormalize_vf2_vf2(x); + y = dfmul_vf2_vf_vf(a, vgather_vf_p_vi2(Sleef_rempitabsp+1, ex)); + x = dfadd2_vf2_vf2_vf2(x, y); + di = rempisubf(vf2getx_vf_vf2(x)); + q = vadd_vi2_vi2_vi2(q, figeti_vi2_di(di)); + x = vf2setx_vf2_vf2_vf(x, figetd_vf_di(di)); + x = dfnormalize_vf2_vf2(x); + y = vcast_vf2_vf_vf(vgather_vf_p_vi2(Sleef_rempitabsp+2, ex), vgather_vf_p_vi2(Sleef_rempitabsp+3, ex)); + y = dfmul_vf2_vf2_vf(y, a); + x = dfadd2_vf2_vf2_vf2(x, y); + x = dfnormalize_vf2_vf2(x); + x = dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(3.1415927410125732422f*2, -8.7422776573475857731e-08f*2)); + x = vsel_vf2_vo_vf2_vf2(vlt_vo_vf_vf(vabs_vf_vf(a), vcast_vf_f(0.7f)), vcast_vf2_vf_vf(a, vcast_vf_f(0)), x); + return dfisetdfi_dfi_vf2_vi2(x, q); +} + +EXPORT CONST VECTOR_CC vfloat xsinf(vfloat d) { +#if !defined(DETERMINISTIC) + vint2 q; + vfloat u, s, r = d; + + if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { + q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI))); + u = vcast_vf_vi2(q); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f), d); + } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { + q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI))); + u = vcast_vf_vi2(q); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df), d); + } else { + dfi_t dfi = rempif(d); + q = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); + q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); + q = vsra_vi2_vi2_i(q, 2); + vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(1)); + vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi))), + vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)))); + x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); + dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); + d = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); + + d = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(d))); + } + + s = vmul_vf_vf_vf(d, d); + + d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); + + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); + + u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); + + u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(r), r, u); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vint2 q; + vfloat u, s, r = d; + + q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI))); + u = vcast_vf_vi2(q); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f), d); + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(r), vcast_vf_f(TRIGRANGEMAX2f)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + s = vcast_vf_vi2(q); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Af), r); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Bf), u); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Cf), u); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Df), u); + + d = vsel_vf_vo_vf_vf(g, d, u); + g = vlt_vo_vf_vf(vabs_vf_vf(r), vcast_vf_f(TRIGRANGEMAXf)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + dfi_t dfi = rempif(r); + vint2 q2 = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); + q2 = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q2, q2), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); + q2 = vsra_vi2_vi2_i(q2, 2); + vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(1)); + vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi))), + vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)))); + x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); + dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); + u = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); + + u = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(u))); + + q = vsel_vi2_vo_vi2_vi2(g, q, q2); + d = vsel_vf_vo_vf_vf(g, d, u); + } + } + + s = vmul_vf_vf_vf(d, d); + + d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); + + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); + + u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); + + u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(r), r, u); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +EXPORT CONST VECTOR_CC vfloat xcosf(vfloat d) { +#if !defined(DETERMINISTIC) + vint2 q; + vfloat u, s, r = d; + + if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { + q = vrint_vi2_vf(vsub_vf_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)), vcast_vf_f(0.5f))); + q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1)); + + u = vcast_vf_vi2(q); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), d); + } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { + q = vrint_vi2_vf(vsub_vf_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)), vcast_vf_f(0.5f))); + q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1)); + + u = vcast_vf_vi2(q); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), d); + } else { + dfi_t dfi = rempif(d); + q = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); + q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); + q = vsra_vi2_vi2_i(q, 1); + vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(0)); + vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); + vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), + vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); + x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); + dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); + d = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); + + d = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(d))); + } + + s = vmul_vf_vf_vf(d, d); + + d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); + + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); + + u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vint2 q; + vfloat u, s, r = d; + + q = vrint_vi2_vf(vsub_vf_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)), vcast_vf_f(0.5f))); + q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1)); + u = vcast_vf_vi2(q); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), d); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), d); + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(r), vcast_vf_f(TRIGRANGEMAX2f)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + s = vcast_vf_vi2(q); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Af*0.5f), r); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Bf*0.5f), u); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Cf*0.5f), u); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Df*0.5f), u); + + d = vsel_vf_vo_vf_vf(g, d, u); + g = vlt_vo_vf_vf(vabs_vf_vf(r), vcast_vf_f(TRIGRANGEMAXf)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + dfi_t dfi = rempif(r); + vint2 q2 = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); + q2 = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q2, q2), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); + q2 = vsra_vi2_vi2_i(q2, 1); + vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(0)); + vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); + vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), + vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); + x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); + dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); + u = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); + + u = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(u))); + + q = vsel_vi2_vo_vi2_vi2(g, q, q2); + d = vsel_vf_vo_vf_vf(g, d, u); + } + } + + s = vmul_vf_vf_vf(d, d); + + d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); + + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); + + u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +EXPORT CONST VECTOR_CC vfloat xtanf(vfloat d) { +#if !defined(DETERMINISTIC) + vint2 q; + vopmask o; + vfloat u, s, x; + + x = d; + + if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f*0.5f))))) { + q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); + u = vcast_vf_vi2(q); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), x); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), x); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), x); + } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { + q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); + u = vcast_vf_vi2(q); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), x); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), x); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), x); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), x); + } else { + dfi_t dfi = rempif(d); + q = dfigeti_vi2_dfi(dfi); + x = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); + x = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(x))); + x = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, x); + } + + s = vmul_vf_vf_vf(x, x); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); + x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(x))); + +#if defined(ENABLE_NEON32) + u = vcast_vf_f(0.00927245803177356719970703f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00331984995864331722259521f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0242998078465461730957031f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0534495301544666290283203f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.133383005857467651367188f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.333331853151321411132812f)); +#else + vfloat s2 = vmul_vf_vf_vf(s, s), s4 = vmul_vf_vf_vf(s2, s2); + u = POLY6(s, s2, s4, + 0.00927245803177356719970703f, + 0.00331984995864331722259521f, + 0.0242998078465461730957031f, + 0.0534495301544666290283203f, + 0.133383005857467651367188f, + 0.333331853151321411132812f); +#endif + + u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, x), x); + + u = vsel_vf_vo_vf_vf(o, vrec_vf_vf(u), u); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vint2 q; + vopmask o; + vfloat u, s, x; + + q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); + u = vcast_vf_vi2(q); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), x); + x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), x); + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f*0.5f)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + vint2 q2 = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); + s = vcast_vf_vi2(q); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Af*0.5f), d); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Bf*0.5f), u); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Cf*0.5f), u); + u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Df*0.5f), u); + + q = vsel_vi2_vo_vi2_vi2(g, q, q2); + x = vsel_vf_vo_vf_vf(g, x, u); + g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + dfi_t dfi = rempif(d); + u = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); + u = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(u))); + u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); + q = vsel_vi2_vo_vi2_vi2(g, q, dfigeti_vi2_dfi(dfi)); + x = vsel_vf_vo_vf_vf(g, x, u); + } + } + + s = vmul_vf_vf_vf(x, x); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); + x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(x))); + +#if defined(ENABLE_NEON32) + u = vcast_vf_f(0.00927245803177356719970703f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00331984995864331722259521f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0242998078465461730957031f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0534495301544666290283203f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.133383005857467651367188f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.333331853151321411132812f)); +#else + vfloat s2 = vmul_vf_vf_vf(s, s), s4 = vmul_vf_vf_vf(s2, s2); + u = POLY6(s, s2, s4, + 0.00927245803177356719970703f, + 0.00331984995864331722259521f, + 0.0242998078465461730957031f, + 0.0534495301544666290283203f, + 0.133383005857467651367188f, + 0.333331853151321411132812f); +#endif + + u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, x), x); + + u = vsel_vf_vo_vf_vf(o, vrec_vf_vf(u), u); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +EXPORT CONST VECTOR_CC vfloat xsinf_u1(vfloat d) { +#if !defined(DETERMINISTIC) + vint2 q; + vfloat u, v; + vfloat2 s, t, x; + + if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { + u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(M_1_PI))); + q = vrint_vi2_vf(u); + v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); + s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f))); + s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f))); + } else { + dfi_t dfi = rempif(d); + q = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); + q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); + q = vsra_vi2_vi2_i(q, 2); + vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(1)); + vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi))), + vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)))); + x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); + dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); + s = dfnormalize_vf2_vf2(dfigetdf_vf2_dfi(dfi)); + +#if !defined(_MSC_VER) + s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(vf2getx_vf_vf2(s))))); +#else + s.x = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(s.x))); +#endif + } + + t = s; + s = dfsqu_vf2_vf2(s); + + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.0001981069071916863322258f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833307858556509017944336f)); + + x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))), s)); + + u = dfmul_vf_vf2_vf2(t, x); + + u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); + + u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vint2 q; + vfloat u, v; + vfloat2 s, t, x; + + u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(M_1_PI))); + q = vrint_vi2_vf(u); + v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); + s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f))); + s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f))); + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + dfi_t dfi = rempif(d); + vint2 q2 = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); + q2 = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q2, q2), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); + q2 = vsra_vi2_vi2_i(q2, 2); + vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(1)); + vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi))), + vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)))); + x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); + dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); + t = dfnormalize_vf2_vf2(dfigetdf_vf2_dfi(dfi)); + + t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); + + q = vsel_vi2_vo_vi2_vi2(g, q, q2); + s = vsel_vf2_vo_vf2_vf2(g, s, t); + } + + t = s; + s = dfsqu_vf2_vf2(s); + + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.0001981069071916863322258f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833307858556509017944336f)); + + x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))), s)); + + u = dfmul_vf_vf2_vf2(t, x); + + u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); + + u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +EXPORT CONST VECTOR_CC vfloat xcosf_u1(vfloat d) { +#if !defined(DETERMINISTIC) + vint2 q; + vfloat u; + vfloat2 s, t, x; + + if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { + vfloat dq = vmla_vf_vf_vf_vf(vrint_vf_vf(vmla_vf_vf_vf_vf(d, vcast_vf_f(M_1_PI), vcast_vf_f(-0.5f))), + vcast_vf_f(2), vcast_vf_f(1)); + q = vrint_vi2_vf(dq); + s = dfadd2_vf2_vf_vf (d, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_A2f*0.5f))); + s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_B2f*0.5f))); + s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_C2f*0.5f))); + } else { + dfi_t dfi = rempif(d); + q = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); + q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); + q = vsra_vi2_vi2_i(q, 1); + vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(0)); + vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); + vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), + vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); + x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); + dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); + s = dfnormalize_vf2_vf2(dfigetdf_vf2_dfi(dfi)); + +#if !defined(_MSC_VER) + s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(vf2getx_vf_vf2(s))))); +#else + s.x = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(s.x))); +#endif + } + + t = s; + s = dfsqu_vf2_vf2(s); + + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.0001981069071916863322258f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833307858556509017944336f)); + + x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))), s)); + + u = dfmul_vf_vf2_vf2(t, x); + + u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vint2 q; + vfloat u; + vfloat2 s, t, x; + + vfloat dq = vmla_vf_vf_vf_vf(vrint_vf_vf(vmla_vf_vf_vf_vf(d, vcast_vf_f(M_1_PI), vcast_vf_f(-0.5f))), + vcast_vf_f(2), vcast_vf_f(1)); + q = vrint_vi2_vf(dq); + s = dfadd2_vf2_vf_vf (d, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_A2f*0.5f))); + s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_B2f*0.5f))); + s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_C2f*0.5f))); + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + dfi_t dfi = rempif(d); + vint2 q2 = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); + q2 = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q2, q2), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); + q2 = vsra_vi2_vi2_i(q2, 1); + vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(0)); + vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); + vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), + vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); + x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); + dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); + t = dfnormalize_vf2_vf2(dfigetdf_vf2_dfi(dfi)); + + t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); + + q = vsel_vi2_vo_vi2_vi2(g, q, q2); + s = vsel_vf2_vo_vf2_vf2(g, s, t); + } + + t = s; + s = dfsqu_vf2_vf2(s); + + u = vcast_vf_f(2.6083159809786593541503e-06f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.0001981069071916863322258f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833307858556509017944336f)); + + x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))), s)); + + u = dfmul_vf_vf2_vf2(t, x); + + u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +EXPORT CONST VECTOR_CC vfloat xfastsinf_u3500(vfloat d) { + vint2 q; + vfloat u, s, t = d; + + s = vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)); + u = vrint_vf_vf(s); + q = vrint_vi2_vf(s); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-(float)M_PI), d); + + s = vmul_vf_vf_vf(d, d); + + u = vcast_vf_f(-0.1881748176e-3); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.8323502727e-2)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.1666651368e+0)); + u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, d), u, d); + + u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(u))); + + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(t), vcast_vf_f(30.0f)); + if (!LIKELY(vtestallones_i_vo32(g))) return vsel_vf_vo_vf_vf(g, u, xsinf(t)); + + return u; +} + +EXPORT CONST VECTOR_CC vfloat xfastcosf_u3500(vfloat d) { + vint2 q; + vfloat u, s, t = d; + + s = vmla_vf_vf_vf_vf(d, vcast_vf_f((float)M_1_PI), vcast_vf_f(-0.5f)); + u = vrint_vf_vf(s); + q = vrint_vi2_vf(s); + d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-(float)M_PI), vsub_vf_vf_vf(d, vcast_vf_f((float)M_PI * 0.5f))); + + s = vmul_vf_vf_vf(d, d); + + u = vcast_vf_f(-0.1881748176e-3); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.8323502727e-2)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.1666651368e+0)); + u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, d), u, d); + + u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(u))); + + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(t), vcast_vf_f(30.0f)); + if (!LIKELY(vtestallones_i_vo32(g))) return vsel_vf_vo_vf_vf(g, u, xcosf(t)); + + return u; +} + +#ifdef ENABLE_GNUABI +#define TYPE2_FUNCATR static INLINE CONST +#define TYPE6_FUNCATR static INLINE CONST +#define SQRTFU05_FUNCATR static INLINE CONST +#define XSINCOSF sincosfk +#define XSINCOSF_U1 sincosfk_u1 +#define XSINCOSPIF_U05 sincospifk_u05 +#define XSINCOSPIF_U35 sincospifk_u35 +#define XMODFF modffk +#else +#define TYPE2_FUNCATR EXPORT CONST +#define TYPE6_FUNCATR EXPORT +#define SQRTFU05_FUNCATR EXPORT +#define XSINCOSF xsincosf +#define XSINCOSF_U1 xsincosf_u1 +#define XSINCOSPIF_U05 xsincospif_u05 +#define XSINCOSPIF_U35 xsincospif_u35 +#define XMODFF xmodff +#endif + +TYPE2_FUNCATR VECTOR_CC vfloat2 XSINCOSF(vfloat d) { +#if !defined(DETERMINISTIC) + vint2 q; + vopmask o; + vfloat u, s, t, rx, ry; + vfloat2 r; + + s = d; + + if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { + q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); + u = vcast_vf_vi2(q); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), s); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), s); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), s); + } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { + q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); + u = vcast_vf_vi2(q); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), s); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), s); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), s); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), s); + } else { + dfi_t dfi = rempif(d); + q = dfigeti_vi2_dfi(dfi); + s = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); + s = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(s))); + } + + t = s; + + s = vmul_vf_vf_vf(s, s); + + u = vcast_vf_f(-0.000195169282960705459117889f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833215750753879547119141f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666537523269653320312f)); + + rx = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(u, s), t, t); + rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); + + u = vcast_vf_f(-2.71811842367242206819355e-07f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(2.47990446951007470488548e-05f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.00138888787478208541870117f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416666641831398010253906f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.5)); + + ry = vmla_vf_vf_vf_vf(s, u, vcast_vf_f(1)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); + r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + return r; + +#else // #if !defined(DETERMINISTIC) + + vint2 q; + vopmask o; + vfloat u, s, t, rx, ry; + vfloat2 r; + + q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); + u = vcast_vf_vi2(q); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), s); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), s); + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + vint2 q2 = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); + u = vcast_vf_vi2(q2); + t = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), d); + t = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), t); + t = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), t); + t = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), t); + + q = vsel_vi2_vo_vi2_vi2(g, q, q2); + s = vsel_vf_vo_vf_vf(g, s, t); + g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + dfi_t dfi = rempif(d); + t = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); + t = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(t))); + + q = vsel_vi2_vo_vi2_vi2(g, q, dfigeti_vi2_dfi(dfi)); + s = vsel_vf_vo_vf_vf(g, s, t); + } + } + + t = s; + + s = vmul_vf_vf_vf(s, s); + + u = vcast_vf_f(-0.000195169282960705459117889f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833215750753879547119141f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666537523269653320312f)); + + rx = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(u, s), t, t); + rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); + + u = vcast_vf_f(-2.71811842367242206819355e-07f); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(2.47990446951007470488548e-05f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.00138888787478208541870117f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416666641831398010253906f)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.5)); + + ry = vmla_vf_vf_vf_vf(s, u, vcast_vf_f(1)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); + r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + return r; +#endif // #if !defined(DETERMINISTIC) +} + +TYPE2_FUNCATR VECTOR_CC vfloat2 XSINCOSF_U1(vfloat d) { +#if !defined(DETERMINISTIC) + vint2 q; + vopmask o; + vfloat u, v, rx, ry; + vfloat2 r, s, t, x; + + if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { + u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); + q = vrint_vi2_vf(u); + v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); + s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); + s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); + } else { + dfi_t dfi = rempif(d); + q = dfigeti_vi2_dfi(dfi); + s = dfigetdf_vf2_dfi(dfi); + o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); + s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(s))))); + } + + t = s; + + s = vf2setx_vf2_vf2_vf(s, dfsqu_vf_vf2(s)); + + u = vcast_vf_f(-0.000195169282960705459117889f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833215750753879547119141f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.166666537523269653320312f)); + + u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(vf2getx_vf_vf2(s), vf2getx_vf_vf2(t))); + + x = dfadd_vf2_vf2_vf(t, u); + rx = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); + + u = vcast_vf_f(-2.71811842367242206819355e-07f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(2.47990446951007470488548e-05f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.00138888787478208541870117f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0416666641831398010253906f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.5)); + + x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(vf2getx_vf_vf2(s), u)); + ry = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); + r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + return r; + +#else // #if !defined(DETERMINISTIC) + + vint2 q; + vopmask o; + vfloat u, v, rx, ry; + vfloat2 r, s, t, x; + + u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); + q = vrint_vi2_vf(u); + v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); + s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); + s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + dfi_t dfi = rempif(d); + t = dfigetdf_vf2_dfi(dfi); + o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); + t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); + q = vsel_vi2_vo_vi2_vi2(g, q, dfigeti_vi2_dfi(dfi)); + s = vsel_vf2_vo_vf2_vf2(g, s, t); + } + + t = s; + + s = vf2setx_vf2_vf2_vf(s, dfsqu_vf_vf2(s)); + + u = vcast_vf_f(-0.000195169282960705459117889f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833215750753879547119141f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.166666537523269653320312f)); + + u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(vf2getx_vf_vf2(s), vf2getx_vf_vf2(t))); + + x = dfadd_vf2_vf2_vf(t, u); + rx = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); + + u = vcast_vf_f(-2.71811842367242206819355e-07f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(2.47990446951007470488548e-05f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.00138888787478208541870117f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0416666641831398010253906f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.5)); + + x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(vf2getx_vf_vf2(s), u)); + ry = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); + r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + return r; +#endif // #if !defined(DETERMINISTIC) +} + +#if !defined(DETERMINISTIC) +TYPE2_FUNCATR VECTOR_CC vfloat2 XSINCOSPIF_U05(vfloat d) { + vopmask o; + vfloat u, s, t, rx, ry; + vfloat2 r, x, s2; + + u = vmul_vf_vf_vf(d, vcast_vf_f(4)); + vint2 q = vtruncate_vi2_vf(u); + q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); + s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); + + t = s; + s = vmul_vf_vf_vf(s, s); + s2 = dfmul_vf2_vf_vf(t, t); + + // + + u = vcast_vf_f(+0.3093842054e-6); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3657307388e-4)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2490393585e-2)); + x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vcast_vf2_f_f(-0.080745510756969451904, -1.3373665339076936258e-09)); + x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vcast_vf2_f_f(0.78539818525314331055, -2.1857338617566484855e-08)); + + x = dfmul_vf2_vf2_vf(x, t); + rx = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); + + // + + u = vcast_vf_f(-0.2430611801e-7); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.3590577080e-5)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3259917721e-3)); + x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vcast_vf2_f_f(0.015854343771934509277, 4.4940051354032242811e-10)); + x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vcast_vf2_f_f(-0.30842512845993041992, -9.0728339030733922277e-09)); + + x = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf2(x, s2), vcast_vf_f(1)); + ry = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + // + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)); + r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(4)), vcast_vi2_i(4)); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(4)), vcast_vi2_i(4)); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + o = vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1e+7f)); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + o = visinf_vo_vf(d); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + return r; +} + +TYPE2_FUNCATR VECTOR_CC vfloat2 XSINCOSPIF_U35(vfloat d) { + vopmask o; + vfloat u, s, t, rx, ry; + vfloat2 r; + + u = vmul_vf_vf_vf(d, vcast_vf_f(4)); + vint2 q = vtruncate_vi2_vf(u); + q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); + s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); + + t = s; + s = vmul_vf_vf_vf(s, s); + + // + + u = vcast_vf_f(-0.3600925265e-4); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2490088111e-2)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.8074551076e-1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.7853981853e+0)); + + rx = vmul_vf_vf_vf(u, t); + + // + + u = vcast_vf_f(+0.3539815225e-5); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3259574005e-3)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1585431583e-1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3084251285e+0)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(1)); + + ry = u; + + // + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)); + r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(4)), vcast_vi2_i(4)); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(4)), vcast_vi2_i(4)); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + o = vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1e+7f)); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + o = visinf_vo_vf(d); + r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); + r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); + + return r; +} + +TYPE6_FUNCATR VECTOR_CC vfloat2 XMODFF(vfloat x) { + vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); + fr = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23)), vcast_vf_f(0), fr); + + vfloat2 ret; + + ret = vf2setxy_vf2_vf_vf(vcopysign_vf_vf_vf(fr, x), vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); + + return ret; +} + +#ifdef ENABLE_GNUABI +EXPORT VECTOR_CC void xsincosf(vfloat a, float *ps, float *pc) { + vfloat2 r = sincosfk(a); + vstoreu_v_p_vf(ps, vf2getx_vf_vf2(r)); + vstoreu_v_p_vf(pc, vf2gety_vf_vf2(r)); +} + +EXPORT VECTOR_CC void xsincosf_u1(vfloat a, float *ps, float *pc) { + vfloat2 r = sincosfk_u1(a); + vstoreu_v_p_vf(ps, vf2getx_vf_vf2(r)); + vstoreu_v_p_vf(pc, vf2gety_vf_vf2(r)); +} + +EXPORT VECTOR_CC void xsincospif_u05(vfloat a, float *ps, float *pc) { + vfloat2 r = sincospifk_u05(a); + vstoreu_v_p_vf(ps, vf2getx_vf_vf2(r)); + vstoreu_v_p_vf(pc, vf2gety_vf_vf2(r)); +} + +EXPORT VECTOR_CC void xsincospif_u35(vfloat a, float *ps, float *pc) { + vfloat2 r = sincospifk_u35(a); + vstoreu_v_p_vf(ps, vf2getx_vf_vf2(r)); + vstoreu_v_p_vf(pc, vf2gety_vf_vf2(r)); +} + +EXPORT CONST VECTOR_CC vfloat xmodff(vfloat a, float *iptr) { + vfloat2 r = modffk(a); + vstoreu_v_p_vf(iptr, vf2gety_vf_vf2(r)); + return vf2getx_vf_vf2(r); +} +#endif // #ifdef ENABLE_GNUABI +#endif // #if !defined(DETERMINISTIC) + +EXPORT CONST VECTOR_CC vfloat xtanf_u1(vfloat d) { +#if !defined(DETERMINISTIC) + vint2 q; + vfloat u, v; + vfloat2 s, t, x; + vopmask o; + + if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { + u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); + q = vrint_vi2_vf(u); + v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); + s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); + s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); + } else { + dfi_t dfi = rempif(d); + q = dfigeti_vi2_dfi(dfi); + s = dfigetdf_vf2_dfi(dfi); + o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); + s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(s))))); + s = vf2sety_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(s))))); + } + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); + vmask n = vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))); +#if !defined(_MSC_VER) + s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2getx_vf_vf2(s)), n))); + s = vf2sety_vf2_vf2_vf(s, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2gety_vf_vf2(s)), n))); +#else + s.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(s.x), n)); + s.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(s.y), n)); +#endif + + t = s; + s = dfsqu_vf2_vf2(s); + s = dfnormalize_vf2_vf2(s); + + u = vcast_vf_f(0.00446636462584137916564941f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-8.3920182078145444393158e-05f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0109639242291450500488281f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0212360303848981857299805f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0540687143802642822265625f)); + + x = dfadd_vf2_vf_vf(vcast_vf_f(0.133325666189193725585938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))); + x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(0.33333361148834228515625f), dfmul_vf2_vf2_vf2(s, x)), s)); + x = dfmul_vf2_vf2_vf2(t, x); + + x = vsel_vf2_vo_vf2_vf2(o, dfrec_vf2_vf2(x), x); + + u = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); + + return u; + +#else // #if !defined(DETERMINISTIC) + + vint2 q; + vfloat u, v; + vfloat2 s, t, x; + vopmask o; + + u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); + q = vrint_vi2_vf(u); + v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); + s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); + s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); + vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); + + if (!LIKELY(vtestallones_i_vo32(g))) { + dfi_t dfi = rempif(d); + t = dfigetdf_vf2_dfi(dfi); + o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); + t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); + t = vf2sety_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(t))))); + q = vsel_vi2_vo_vi2_vi2(g, q, dfigeti_vi2_dfi(dfi)); + s = vsel_vf2_vo_vf2_vf2(g, s, t); + } + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); + vmask n = vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))); + s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2getx_vf_vf2(s)), n))); + s = vf2sety_vf2_vf2_vf(s, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2gety_vf_vf2(s)), n))); + + t = s; + s = dfsqu_vf2_vf2(s); + s = dfnormalize_vf2_vf2(s); + + u = vcast_vf_f(0.00446636462584137916564941f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-8.3920182078145444393158e-05f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0109639242291450500488281f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0212360303848981857299805f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0540687143802642822265625f)); + + x = dfadd_vf2_vf_vf(vcast_vf_f(0.133325666189193725585938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))); + x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(0.33333361148834228515625f), dfmul_vf2_vf2_vf2(s, x)), s)); + x = dfmul_vf2_vf2_vf2(t, x); + + x = vsel_vf2_vo_vf2_vf2(o, dfrec_vf2_vf2(x), x); + + u = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); + + return u; +#endif // #if !defined(DETERMINISTIC) +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xatanf(vfloat d) { + vfloat s, t, u; + vint2 q; + + q = vsel_vi2_vf_vi2(d, vcast_vi2_i(2)); + s = vabs_vf_vf(d); + + q = vsel_vi2_vf_vf_vi2_vi2(vcast_vf_f(1.0f), s, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q); + s = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vcast_vf_f(1.0f), s), vrec_vf_vf(s), s); + + t = vmul_vf_vf_vf(s, s); + + vfloat t2 = vmul_vf_vf_vf(t, t), t4 = vmul_vf_vf_vf(t2, t2); + u = POLY8(t, t2, t4, + 0.00282363896258175373077393f, + -0.0159569028764963150024414f, + 0.0425049886107444763183594f, + -0.0748900920152664184570312f, + 0.106347933411598205566406f, + -0.142027363181114196777344f, + 0.199926957488059997558594f, + -0.333331018686294555664062f); + + t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s); + + t = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), t), t); + + t = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(t))); + +#if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4) + t = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(1.5874010519681994747517056f), d), t); +#endif + + return t; +} +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vfloat atan2kf(vfloat y, vfloat x) { + vfloat s, t, u; + vint2 q; + vopmask p; + + q = vsel_vi2_vf_vi2(x, vcast_vi2_i(-2)); + x = vabs_vf_vf(x); + + q = vsel_vi2_vf_vf_vi2_vi2(x, y, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q); + p = vlt_vo_vf_vf(x, y); + s = vsel_vf_vo_vf_vf(p, vneg_vf_vf(x), y); + t = vmax_vf_vf_vf(x, y); + + s = vdiv_vf_vf_vf(s, t); + t = vmul_vf_vf_vf(s, s); + + vfloat t2 = vmul_vf_vf_vf(t, t), t4 = vmul_vf_vf_vf(t2, t2); + u = POLY8(t, t2, t4, + 0.00282363896258175373077393f, + -0.0159569028764963150024414f, + 0.0425049886107444763183594f, + -0.0748900920152664184570312f, + 0.106347933411598205566406f, + -0.142027363181114196777344f, + 0.199926957488059997558594f, + -0.333331018686294555664062f); + + t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s); + t = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f((float)(M_PI/2)), t); + + return t; +} + +static INLINE CONST VECTOR_CC vfloat visinf2_vf_vf_vf(vfloat d, vfloat m) { + return vreinterpret_vf_vm(vand_vm_vo32_vm(visinf_vo_vf(d), vor_vm_vm_vm(vsignbit_vm_vf(d), vreinterpret_vm_vf(m)))); +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xatan2f(vfloat y, vfloat x) { + vfloat r = atan2kf(vabs_vf_vf(y), x); + + r = vmulsign_vf_vf_vf(r, x); + r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), veq_vo_vf_vf(x, vcast_vf_f(0.0f))), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), x))), r); + r = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f((float)(M_PI/4)), x))), r); + + r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(0.0f)), vreinterpret_vf_vm(vand_vm_vo32_vm(vsignbit_vo_vf(x), vreinterpret_vm_vf(vcast_vf_f((float)M_PI)))), r); + + r = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(vmulsign_vf_vf_vf(r, y)))); + return r; +} + +EXPORT CONST VECTOR_CC vfloat xasinf(vfloat d) { + vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); + vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))); + vfloat x = vsel_vf_vo_vf_vf(o, vabs_vf_vf(d), vsqrt_vf_vf(x2)), u; + + u = vcast_vf_f(+0.4197454825e-1); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); + u = vmla_vf_vf_vf_vf(u, vmul_vf_vf_vf(x, x2), x); + + vfloat r = vsel_vf_vo_vf_vf(o, u, vmla_vf_vf_vf_vf(u, vcast_vf_f(-2), vcast_vf_f(M_PIf/2))); + return vmulsign_vf_vf_vf(r, d); +} + +EXPORT CONST VECTOR_CC vfloat xacosf(vfloat d) { + vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); + vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), + vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))), u; + vfloat x = vsel_vf_vo_vf_vf(o, vabs_vf_vf(d), vsqrt_vf_vf(x2)); + x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1.0f)), vcast_vf_f(0), x); + + u = vcast_vf_f(+0.4197454825e-1); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); + u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(x2, x)); + + vfloat y = vsub_vf_vf_vf(vcast_vf_f(3.1415926535897932f/2), vadd_vf_vf_vf(vmulsign_vf_vf_vf(x, d), vmulsign_vf_vf_vf(u, d))); + x = vadd_vf_vf_vf(x, u); + vfloat r = vsel_vf_vo_vf_vf(o, y, vmul_vf_vf_vf(x, vcast_vf_f(2))); + return vsel_vf_vo_vf_vf(vandnot_vo_vo_vo(o, vlt_vo_vf_vf(d, vcast_vf_f(0))), + vf2getx_vf_vf2(dfadd_vf2_vf2_vf(vcast_vf2_f_f(3.1415927410125732422f,-8.7422776573475857731e-08f), + vneg_vf_vf(r))), r); +} +#endif // #if !defined(DETERMINISTIC) + +// + +static INLINE CONST VECTOR_CC vfloat2 atan2kf_u1(vfloat2 y, vfloat2 x) { + vfloat u; + vfloat2 s, t; + vint2 q; + vopmask p; + vmask r; + + q = vsel_vi2_vf_vf_vi2_vi2(vf2getx_vf_vf2(x), vcast_vf_f(0), vcast_vi2_i(-2), vcast_vi2_i(0)); + p = vlt_vo_vf_vf(vf2getx_vf_vf2(x), vcast_vf_f(0)); + r = vand_vm_vo32_vm(p, vreinterpret_vm_vf(vcast_vf_f(-0.0))); + x = vf2setx_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2getx_vf_vf2(x)), r))); + x = vf2sety_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2gety_vf_vf2(x)), r))); + + q = vsel_vi2_vf_vf_vi2_vi2(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y), vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q); + p = vlt_vo_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); + s = vsel_vf2_vo_vf2_vf2(p, dfneg_vf2_vf2(x), y); + t = vsel_vf2_vo_vf2_vf2(p, y, x); + + s = dfdiv_vf2_vf2_vf2(s, t); + t = dfsqu_vf2_vf2(s); + t = dfnormalize_vf2_vf2(t); + + u = vcast_vf_f(-0.00176397908944636583328247f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(0.0107900900766253471374512f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(-0.0309564601629972457885742f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(0.0577365085482597351074219f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(-0.0838950723409652709960938f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(0.109463557600975036621094f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(-0.142626821994781494140625f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(0.199983194470405578613281f)); + + t = dfmul_vf2_vf2_vf2(t, dfadd_vf2_vf_vf(vcast_vf_f(-0.333332866430282592773438f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(t)))); + t = dfmul_vf2_vf2_vf2(s, dfadd_vf2_vf_vf2(vcast_vf_f(1), t)); + t = dfadd_vf2_vf2_vf2(dfmul_vf2_vf2_vf(vcast_vf2_f_f(1.5707963705062866211f, -4.3711388286737928865e-08f), vcast_vf_vi2(q)), t); + + return t; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xatan2f_u1(vfloat y, vfloat x) { + vopmask o = vlt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(2.9387372783541830947e-39f)); // nexttowardf((1.0 / FLT_MAX), 1) + x = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(x, vcast_vf_f(1 << 24)), x); + y = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(y, vcast_vf_f(1 << 24)), y); + + vfloat2 d = atan2kf_u1(vcast_vf2_vf_vf(vabs_vf_vf(y), vcast_vf_f(0)), vcast_vf2_vf_vf(x, vcast_vf_f(0))); + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); + + r = vmulsign_vf_vf_vf(r, x); + r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), veq_vo_vf_vf(x, vcast_vf_f(0))), vsub_vf_vf_vf(vcast_vf_f(M_PI/2), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f(M_PI/2), x))), r); + r = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vsub_vf_vf_vf(vcast_vf_f(M_PI/2), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f(M_PI/4), x))), r); + r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(0.0f)), vreinterpret_vf_vm(vand_vm_vo32_vm(vsignbit_vo_vf(x), vreinterpret_vm_vf(vcast_vf_f((float)M_PI)))), r); + + r = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(vmulsign_vf_vf_vf(r, y)))); + return r; +} + +EXPORT CONST VECTOR_CC vfloat xasinf_u1(vfloat d) { + vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); + vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))), u; + vfloat2 x = vsel_vf2_vo_vf2_vf2(o, vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), dfsqrt_vf2_vf(x2)); + x = vsel_vf2_vo_vf2_vf2(veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1.0f)), vcast_vf2_f_f(0, 0), x); + + u = vcast_vf_f(+0.4197454825e-1); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); + u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x))); + + vfloat2 y = dfsub_vf2_vf2_vf(dfsub_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f/4,-8.7422776573475857731e-08f/4), x), u); + + vfloat r = vsel_vf_vo_vf_vf(o, vadd_vf_vf_vf(u, vf2getx_vf_vf2(x)), + vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(y), vf2gety_vf_vf2(y)), vcast_vf_f(2))); + return vmulsign_vf_vf_vf(r, d); +} + +EXPORT CONST VECTOR_CC vfloat xacosf_u1(vfloat d) { + vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); + vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))), u; + vfloat2 x = vsel_vf2_vo_vf2_vf2(o, vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), dfsqrt_vf2_vf(x2)); + x = vsel_vf2_vo_vf2_vf2(veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1.0f)), vcast_vf2_f_f(0, 0), x); + + u = vcast_vf_f(+0.4197454825e-1); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); + u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); + u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x))); + + vfloat2 y = dfsub_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f/2, -8.7422776573475857731e-08f/2), + dfadd_vf2_vf_vf(vmulsign_vf_vf_vf(vf2getx_vf_vf2(x), d), vmulsign_vf_vf_vf(u, d))); + x = dfadd_vf2_vf2_vf(x, u); + + y = vsel_vf2_vo_vf2_vf2(o, y, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); + + y = vsel_vf2_vo_vf2_vf2(vandnot_vo_vo_vo(o, vlt_vo_vf_vf(d, vcast_vf_f(0))), + dfsub_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f, -8.7422776573475857731e-08f), y), y); + + return vadd_vf_vf_vf(vf2getx_vf_vf2(y), vf2gety_vf_vf2(y)); +} + +EXPORT CONST VECTOR_CC vfloat xatanf_u1(vfloat d) { + vfloat2 d2 = atan2kf_u1(vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), vcast_vf2_f_f(1, 0)); + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(d2), vf2gety_vf_vf2(d2)); + r = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vcast_vf_f(1.570796326794896557998982), r); + return vmulsign_vf_vf_vf(r, d); +} +#endif // #if !defined(DETERMINISTIC) + +// + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xlogf(vfloat d) { + vfloat x, x2, t, m; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); + vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); + m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); + e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); +#else + vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); + e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); + m = vgetmant_vf_vf(d); +#endif + + x = vdiv_vf_vf_vf(vsub_vf_vf_vf(m, vcast_vf_f(1.0f)), vadd_vf_vf_vf(vcast_vf_f(1.0f), m)); + x2 = vmul_vf_vf_vf(x, x); + + t = vcast_vf_f(0.2392828464508056640625f); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.28518211841583251953125f)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.400005877017974853515625f)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.666666686534881591796875f)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(2.0f)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); + x = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), x); + x = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NANf), x); + x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITYf), x); +#else + x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), e)); + x = vfixup_vf_vf_vf_vi2_i(x, d, vcast_vi2_i((5 << (5*4))), 0); +#endif + + return x; +} +#endif // #if !defined(DETERMINISTIC) + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xexpf(vfloat d) { + vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f))); + vfloat s, u; + + s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d); + s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s); + + u = vcast_vf_f(0.000198527617612853646278381); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00139304355252534151077271)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833336077630519866943359)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416664853692054748535156)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.166666671633720397949219)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.5)); + + u = vadd_vf_vf_vf(vcast_vf_f(1.0f), vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, s)); + + u = vldexp2_vf_vf_vi2(u, q); + + u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-104)), vreinterpret_vm_vf(u))); + u = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vcast_vf_f(100), d), vcast_vf_f(SLEEF_INFINITYf), u); + + return u; +} +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vfloat expm1fk(vfloat d) { + vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f))); + vfloat s, u; + + s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d); + s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s); + + vfloat s2 = vmul_vf_vf_vf(s, s), s4 = vmul_vf_vf_vf(s2, s2); + u = POLY6(s, s2, s4, + 0.000198527617612853646278381, + 0.00139304355252534151077271, + 0.00833336077630519866943359, + 0.0416664853692054748535156, + 0.166666671633720397949219, + 0.5); + + u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, s); + + u = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(q, vcast_vi2_i(0)), u, + vsub_vf_vf_vf(vldexp2_vf_vf_vi2(vadd_vf_vf_vf(u, vcast_vf_f(1)), q), vcast_vf_f(1))); + + return u; +} + +#if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4) +EXPORT CONST VECTOR_CC vfloat xsqrtf_u35(vfloat d) { + vfloat e = vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vcast_vi2_i(0x20000000), vand_vi2_vi2_vi2(vcast_vi2_i(0x7f000000), vsrl_vi2_vi2_i(vreinterpret_vi2_vf(d), 1)))); + vfloat m = vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vcast_vi2_i(0x3f000000), vand_vi2_vi2_vi2(vcast_vi2_i(0x01ffffff), vreinterpret_vi2_vf(d)))); + float32x4_t x = vrsqrteq_f32(m); + x = vmulq_f32(x, vrsqrtsq_f32(m, vmulq_f32(x, x))); + float32x4_t u = vmulq_f32(x, m); + u = vmlaq_f32(u, vmlsq_f32(m, u, u), vmulq_f32(x, vdupq_n_f32(0.5))); + e = vreinterpret_vf_vm(vandnot_vm_vo32_vm(veq_vo_vf_vf(d, vcast_vf_f(0)), vreinterpret_vm_vf(e))); + u = vmul_vf_vf_vf(e, u); + + u = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), u); + u = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(d), vlt_vo_vf_vf(d, vcast_vf_f(0))), vreinterpret_vm_vf(u))); + u = vmulsign_vf_vf_vf(u, d); + + return u; +} +#elif defined(ENABLE_VECEXT) +EXPORT CONST VECTOR_CC vfloat xsqrtf_u35(vfloat d) { + vfloat q = vsqrt_vf_vf(d); + q = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0), q); + return vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), q); +} +#else +EXPORT CONST VECTOR_CC vfloat xsqrtf_u35(vfloat d) { return vsqrt_vf_vf(d); } +#endif + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xcbrtf(vfloat d) { + vfloat x, y, q = vcast_vf_f(1.0), t; + vint2 e, qu, re; + +#if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) + vfloat s = d; +#endif + e = vadd_vi2_vi2_vi2(vilogbk_vi2_vf(vabs_vf_vf(d)), vcast_vi2_i(1)); + d = vldexp2_vf_vf_vi2(d, vneg_vi2_vi2(e)); + + t = vadd_vf_vf_vf(vcast_vf_vi2(e), vcast_vf_f(6144)); + qu = vtruncate_vi2_vf(vmul_vf_vf_vf(t, vcast_vf_f(1.0f/3.0f))); + re = vtruncate_vi2_vf(vsub_vf_vf_vf(t, vmul_vf_vf_vf(vcast_vf_vi2(qu), vcast_vf_f(3)))); + + q = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(re, vcast_vi2_i(1)), vcast_vf_f(1.2599210498948731647672106f), q); + q = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(re, vcast_vi2_i(2)), vcast_vf_f(1.5874010519681994747517056f), q); + q = vldexp2_vf_vf_vi2(q, vsub_vi2_vi2_vi2(qu, vcast_vi2_i(2048))); + + q = vmulsign_vf_vf_vf(q, d); + d = vabs_vf_vf(d); + + x = vcast_vf_f(-0.601564466953277587890625f); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.8208892345428466796875f)); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-5.532182216644287109375f)); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(5.898262500762939453125f)); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-3.8095417022705078125f)); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.2241256237030029296875f)); + + y = vmul_vf_vf_vf(vmul_vf_vf_vf(d, x), x); + y = vmul_vf_vf_vf(vsub_vf_vf_vf(y, vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(2.0f / 3.0f), y), vmla_vf_vf_vf_vf(y, x, vcast_vf_f(-1.0f)))), q); + +#if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) + y = vsel_vf_vo_vf_vf(visinf_vo_vf(s), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), s), y); + y = vsel_vf_vo_vf_vf(veq_vo_vf_vf(s, vcast_vf_f(0)), vmulsign_vf_vf_vf(vcast_vf_f(0), s), y); +#endif + + return y; +} +#endif // #if !defined(DETERMINISTIC) + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xcbrtf_u1(vfloat d) { + vfloat x, y, z, t; + vfloat2 q2 = vcast_vf2_f_f(1, 0), u, v; + vint2 e, qu, re; + +#if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) + vfloat s = d; +#endif + e = vadd_vi2_vi2_vi2(vilogbk_vi2_vf(vabs_vf_vf(d)), vcast_vi2_i(1)); + d = vldexp2_vf_vf_vi2(d, vneg_vi2_vi2(e)); + + t = vadd_vf_vf_vf(vcast_vf_vi2(e), vcast_vf_f(6144)); + qu = vtruncate_vi2_vf(vmul_vf_vf_vf(t, vcast_vf_f(1.0/3.0))); + re = vtruncate_vi2_vf(vsub_vf_vf_vf(t, vmul_vf_vf_vf(vcast_vf_vi2(qu), vcast_vf_f(3)))); + + q2 = vsel_vf2_vo_vf2_vf2(veq_vo_vi2_vi2(re, vcast_vi2_i(1)), vcast_vf2_f_f(1.2599210739135742188f, -2.4018701694217270415e-08), q2); + q2 = vsel_vf2_vo_vf2_vf2(veq_vo_vi2_vi2(re, vcast_vi2_i(2)), vcast_vf2_f_f(1.5874010324478149414f, 1.9520385308169352356e-08), q2); + + q2 = vf2setx_vf2_vf2_vf(q2, vmulsign_vf_vf_vf(vf2getx_vf_vf2(q2), d)); + q2 = vf2sety_vf2_vf2_vf(q2, vmulsign_vf_vf_vf(vf2gety_vf_vf2(q2), d)); + d = vabs_vf_vf(d); + + x = vcast_vf_f(-0.601564466953277587890625f); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.8208892345428466796875f)); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-5.532182216644287109375f)); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(5.898262500762939453125f)); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-3.8095417022705078125f)); + x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.2241256237030029296875f)); + + y = vmul_vf_vf_vf(x, x); y = vmul_vf_vf_vf(y, y); x = vsub_vf_vf_vf(x, vmul_vf_vf_vf(vmlanp_vf_vf_vf_vf(d, y, x), vcast_vf_f(-1.0 / 3.0))); + + z = x; + + u = dfmul_vf2_vf_vf(x, x); + u = dfmul_vf2_vf2_vf2(u, u); + u = dfmul_vf2_vf2_vf(u, d); + u = dfadd2_vf2_vf2_vf(u, vneg_vf_vf(x)); + y = vadd_vf_vf_vf(vf2getx_vf_vf2(u), vf2gety_vf_vf2(u)); + + y = vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(-2.0 / 3.0), y), z); + v = dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf(z, z), y); + v = dfmul_vf2_vf2_vf(v, d); + v = dfmul_vf2_vf2_vf2(v, q2); + z = vldexp2_vf_vf_vi2(vadd_vf_vf_vf(vf2getx_vf_vf2(v), vf2gety_vf_vf2(v)), vsub_vi2_vi2_vi2(qu, vcast_vi2_i(2048))); + + z = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), vf2getx_vf_vf2(q2)), z); + z = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vreinterpret_vf_vm(vsignbit_vm_vf(vf2getx_vf_vf2(q2))), z); + +#if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) + z = vsel_vf_vo_vf_vf(visinf_vo_vf(s), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), s), z); + z = vsel_vf_vo_vf_vf(veq_vo_vf_vf(s, vcast_vf_f(0)), vmulsign_vf_vf_vf(vcast_vf_f(0), s), z); +#endif + + return z; +} +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vfloat2 logkf(vfloat d) { + vfloat2 x, x2; + vfloat t, m; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); + vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); + m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); + e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); +#else + vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); + e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); + m = vgetmant_vf_vf(d); +#endif + + x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); + x2 = dfsqu_vf2_vf2(x); + + t = vcast_vf_f(0.240320354700088500976562); + t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.285112679004669189453125)); + t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.400007992982864379882812)); + vfloat2 c = vcast_vf2_f_f(0.66666662693023681640625f, 3.69183861259614332084311e-09f); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), vcast_vf_vi2(e)); +#else + vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), e); +#endif + + s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); + s = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(x2, x), + dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf(x2, t), c))); + return s; +} + +static INLINE CONST VECTOR_CC vfloat logk3f(vfloat d) { + vfloat x, x2, t, m; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); + vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); + m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); + e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); +#else + vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); + e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); + m = vgetmant_vf_vf(d); +#endif + + x = vdiv_vf_vf_vf(vsub_vf_vf_vf(m, vcast_vf_f(1.0f)), vadd_vf_vf_vf(vcast_vf_f(1.0f), m)); + x2 = vmul_vf_vf_vf(x, x); + + t = vcast_vf_f(0.2392828464508056640625f); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.28518211841583251953125f)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.400005877017974853515625f)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.666666686534881591796875f)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(2.0f)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); +#else + x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), e)); +#endif + + return x; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xlogf_u1(vfloat d) { + vfloat2 x; + vfloat t, m, x2; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); + vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); + m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); + e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); + vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), vcast_vf_vi2(e)); +#else + vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); + e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); + m = vgetmant_vf_vf(d); + vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), e); +#endif + + x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); + x2 = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); + + t = vcast_vf_f(+0.3027294874e+0f); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.3996108174e+0f)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.6666694880e+0f)); + + s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); + s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t)); + + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), r); + r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NANf), r); + r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITYf), r); +#else + r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); +#endif + + return r; +} +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vfloat expkf(vfloat2 d) { + vfloat u = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(R_LN2f)); + vint2 q = vrint_vi2_vf(u); + vfloat2 s, t; + + s = dfadd2_vf2_vf2_vf(d, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf))); + s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf))); + + s = dfnormalize_vf2_vf2(s); + + u = vcast_vf_f(0.00136324646882712841033936f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00836596917361021041870117f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0416710823774337768554688f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.166665524244308471679688f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.499999850988388061523438f)); + + t = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf(dfsqu_vf2_vf2(s), u)); + + t = dfadd_vf2_vf_vf2(vcast_vf_f(1), t); + u = vadd_vf_vf_vf(vf2getx_vf_vf2(t), vf2gety_vf_vf2(t)); + u = vldexp_vf_vf_vi2(u, q); + + u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(-104)), vreinterpret_vm_vf(u))); + + return u; +} + +static INLINE CONST VECTOR_CC vfloat expk3f(vfloat d) { + vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f))); + vfloat s, u; + + s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d); + s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s); + + u = vcast_vf_f(0.000198527617612853646278381); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00139304355252534151077271)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833336077630519866943359)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416664853692054748535156)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.166666671633720397949219)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.5)); + + u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, vadd_vf_vf_vf(s, vcast_vf_f(1.0f))); + u = vldexp2_vf_vf_vi2(u, q); + + u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-104)), vreinterpret_vm_vf(u))); + + return u; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xpowf(vfloat x, vfloat y) { +#if 1 + vopmask yisint = vor_vo_vo_vo(veq_vo_vf_vf(vtruncate_vf_vf(y), y), vgt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); + vopmask yisodd = vand_vo_vo_vo(vand_vo_vo_vo(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vtruncate_vi2_vf(y), vcast_vi2_i(1)), vcast_vi2_i(1)), yisint), + vlt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); + +#if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4) + yisodd = vandnot_vm_vo32_vm(visinf_vo_vf(y), yisodd); +#endif + + vfloat result = expkf(dfmul_vf2_vf2_vf(logkf(vabs_vf_vf(x)), y)); + + result = vsel_vf_vo_vf_vf(visnan_vo_vf(result), vcast_vf_f(SLEEF_INFINITYf), result); + + result = vmul_vf_vf_vf(result, + vsel_vf_vo_vf_vf(vgt_vo_vf_vf(x, vcast_vf_f(0)), + vcast_vf_f(1), + vsel_vf_vo_vf_vf(yisint, vsel_vf_vo_vf_vf(yisodd, vcast_vf_f(-1.0f), vcast_vf_f(1)), vcast_vf_f(SLEEF_NANf)))); + + vfloat efx = vmulsign_vf_vf_vf(vsub_vf_vf_vf(vabs_vf_vf(x), vcast_vf_f(1)), y); + + result = vsel_vf_vo_vf_vf(visinf_vo_vf(y), + vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(efx, vcast_vf_f(0.0f)), + vreinterpret_vm_vf(vsel_vf_vo_vf_vf(veq_vo_vf_vf(efx, vcast_vf_f(0.0f)), + vcast_vf_f(1.0f), + vcast_vf_f(SLEEF_INFINITYf))))), + result); + + result = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), veq_vo_vf_vf(x, vcast_vf_f(0))), + vmul_vf_vf_vf(vsel_vf_vo_vf_vf(yisodd, vsign_vf_vf(x), vcast_vf_f(1)), + vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vneg_vf_vf(y), y), vcast_vf_f(0)), + vreinterpret_vm_vf(vcast_vf_f(SLEEF_INFINITYf))))), + result); + + result = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(result))); + + result = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(y, vcast_vf_f(0)), veq_vo_vf_vf(x, vcast_vf_f(1))), vcast_vf_f(1), result); + + return result; +#else + return expkf(dfmul_vf2_vf2_vf(logkf(x), y)); +#endif +} + +EXPORT CONST VECTOR_CC vfloat xfastpowf_u3500(vfloat x, vfloat y) { + vfloat result = expk3f(vmul_vf_vf_vf(logk3f(vabs_vf_vf(x)), y)); + vopmask yisint = vor_vo_vo_vo(veq_vo_vf_vf(vtruncate_vf_vf(y), y), vgt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); + vopmask yisodd = vand_vo_vo_vo(vand_vo_vo_vo(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vtruncate_vi2_vf(y), vcast_vi2_i(1)), vcast_vi2_i(1)), yisint), + vlt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); + + result = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vsignbit_vo_vf(x), yisodd), vneg_vf_vf(result), result); + + result = vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vcast_vf_f(0), result); + result = vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(0)), vcast_vf_f(1), result); + + return result; +} +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vfloat2 expk2f(vfloat2 d) { + vfloat u = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(R_LN2f)); + vint2 q = vrint_vi2_vf(u); + vfloat2 s, t; + + s = dfadd2_vf2_vf2_vf(d, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf))); + s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf))); + + u = vcast_vf_f(+0.1980960224e-3f); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(+0.1394256484e-2f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(+0.8333456703e-2f)); + u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(+0.4166637361e-1f)); + + t = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(s, u), vcast_vf_f(+0.166666659414234244790680580464e+0f)); + t = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf2(s, t), vcast_vf_f(0.5)); + t = dfadd2_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf2(dfsqu_vf2_vf2(s), t)); + + t = dfadd_vf2_vf_vf2(vcast_vf_f(1), t); + + t = vf2setx_vf2_vf2_vf(t, vldexp2_vf_vf_vi2(vf2getx_vf_vf2(t), q)); + t = vf2sety_vf2_vf2_vf(t, vldexp2_vf_vf_vi2(vf2gety_vf_vf2(t), q)); + + t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(-104)), vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); + t = vf2sety_vf2_vf2_vf(t, vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(-104)), vreinterpret_vm_vf(vf2gety_vf_vf2(t))))); + + return t; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xsinhf(vfloat x) { + vfloat y = vabs_vf_vf(x); + vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); + d = dfsub_vf2_vf2_vf2(d, dfrec_vf2_vf2(d)); + y = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(0.5)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)), + visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); + y = vmulsign_vf_vf_vf(y, x); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vfloat xcoshf(vfloat x) { + vfloat y = vabs_vf_vf(x); + vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); + d = dfadd_vf2_vf2_vf2(d, dfrec_vf2_vf2(d)); + y = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(0.5)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)), + visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vfloat xtanhf(vfloat x) { + vfloat y = vabs_vf_vf(x); + vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); + vfloat2 e = dfrec_vf2_vf2(d); + d = dfdiv_vf2_vf2_vf2(dfadd_vf2_vf2_vf2(d, dfneg_vf2_vf2(e)), dfadd_vf2_vf2_vf2(d, e)); + y = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(8.664339742f)), + visnan_vo_vf(y)), vcast_vf_f(1.0f), y); + y = vmulsign_vf_vf_vf(y, x); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vfloat xsinhf_u35(vfloat x) { + vfloat e = expm1fk(vabs_vf_vf(x)); + vfloat y = vdiv_vf_vf_vf(vadd_vf_vf_vf(e, vcast_vf_f(2)), vadd_vf_vf_vf(e, vcast_vf_f(1))); + y = vmul_vf_vf_vf(y, vmul_vf_vf_vf(vcast_vf_f(0.5f), e)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(88)), + visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); + y = vmulsign_vf_vf_vf(y, x); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vfloat xcoshf_u35(vfloat x) { + vfloat e = xexpf(vabs_vf_vf(x)); + vfloat y = vmla_vf_vf_vf_vf(vcast_vf_f(0.5f), e, vdiv_vf_vf_vf(vcast_vf_f(0.5), e)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(88)), + visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vfloat xtanhf_u35(vfloat x) { + vfloat d = expm1fk(vmul_vf_vf_vf(vcast_vf_f(2), vabs_vf_vf(x))); + vfloat y = vdiv_vf_vf_vf(d, vadd_vf_vf_vf(vcast_vf_f(2), d)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(8.664339742f)), + visnan_vo_vf(y)), vcast_vf_f(1.0f), y); + y = vmulsign_vf_vf_vf(y, x); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vfloat2 logk2f(vfloat2 d) { + vfloat2 x, x2, m, s; + vfloat t; + vint2 e; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + e = vilogbk_vi2_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(1.0f/0.75f))); +#else + e = vrint_vi2_vf(vgetexp_vf_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(1.0f/0.75f)))); +#endif + m = dfscale_vf2_vf2_vf(d, vpow2i_vf_vi2(vneg_vi2_vi2(e))); + + x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(m, vcast_vf_f(-1)), dfadd2_vf2_vf2_vf(m, vcast_vf_f(1))); + x2 = dfsqu_vf2_vf2(x); + + t = vcast_vf_f(0.2392828464508056640625f); + t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.28518211841583251953125f)); + t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.400005877017974853515625f)); + t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.666666686534881591796875f)); + + s = dfmul_vf2_vf2_vf(vcast_vf2_vf_vf(vcast_vf_f(0.69314718246459960938f), vcast_vf_f(-1.904654323148236017e-09f)), vcast_vf_vi2(e)); + s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); + s = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf(dfmul_vf2_vf2_vf2(x2, x), t)); + + return s; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xasinhf(vfloat x) { + vfloat y = vabs_vf_vf(x); + vopmask o = vgt_vo_vf_vf(y, vcast_vf_f(1)); + vfloat2 d; + + d = vsel_vf2_vo_vf2_vf2(o, dfrec_vf2_vf(x), vcast_vf2_vf_vf(y, vcast_vf_f(0))); + d = dfsqrt_vf2_vf2(dfadd2_vf2_vf2_vf(dfsqu_vf2_vf2(d), vcast_vf_f(1))); + d = vsel_vf2_vo_vf2_vf2(o, dfmul_vf2_vf2_vf(d, y), d); + + d = logk2f(dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf(d, x))); + y = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(SQRT_FLT_MAX)), + visnan_vo_vf(y)), + vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), x), y); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + y = vsel_vf_vo_vf_vf(visnegzero_vo_vf(x), vcast_vf_f(-0.0), y); + + return y; +} + +EXPORT CONST VECTOR_CC vfloat xacoshf(vfloat x) { + vfloat2 d = logk2f(dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfsqrt_vf2_vf2(dfadd2_vf2_vf_vf(x, vcast_vf_f(1))), dfsqrt_vf2_vf2(dfadd2_vf2_vf_vf(x, vcast_vf_f(-1)))), x)); + vfloat y = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); + + y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(SQRT_FLT_MAX)), + visnan_vo_vf(y)), + vcast_vf_f(SLEEF_INFINITYf), y); + + y = vreinterpret_vf_vm(vandnot_vm_vo32_vm(veq_vo_vf_vf(x, vcast_vf_f(1.0f)), vreinterpret_vm_vf(y))); + + y = vreinterpret_vf_vm(vor_vm_vo32_vm(vlt_vo_vf_vf(x, vcast_vf_f(1.0f)), vreinterpret_vm_vf(y))); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} + +EXPORT CONST VECTOR_CC vfloat xatanhf(vfloat x) { + vfloat y = vabs_vf_vf(x); + vfloat2 d = logk2f(dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(1), y), dfadd2_vf2_vf_vf(vcast_vf_f(1), vneg_vf_vf(y)))); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(vgt_vo_vf_vf(y, vcast_vf_f(1.0)), vreinterpret_vm_vf(vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(1.0)), vcast_vf_f(SLEEF_INFINITYf), vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(0.5)))))); + + y = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(y))); + y = vmulsign_vf_vf_vf(y, x); + y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); + + return y; +} +#endif // #if !defined(DETERMINISTIC) + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xexp2f(vfloat d) { + vfloat u = vrint_vf_vf(d), s; + vint2 q = vrint_vi2_vf(u); + + s = vsub_vf_vf_vf(d, u); + + u = vcast_vf_f(+0.1535920892e-3); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1339262701e-2)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.9618384764e-2)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5550347269e-1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2402264476e+0)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.6931471825e+0)); + +#ifdef ENABLE_FMA_SP + u = vfma_vf_vf_vf_vf(u, s, vcast_vf_f(1)); +#else + u = vf2getx_vf_vf2(dfnormalize_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(u, s)))); +#endif + + u = vldexp2_vf_vf_vi2(u, q); + + u = vsel_vf_vo_vf_vf(vge_vo_vf_vf(d, vcast_vf_f(128)), vcast_vf_f(SLEEF_INFINITY), u); + u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-150)), vreinterpret_vm_vf(u))); + + return u; +} + +EXPORT CONST VECTOR_CC vfloat xexp2f_u35(vfloat d) { + vfloat u = vrint_vf_vf(d), s; + vint2 q = vrint_vi2_vf(u); + + s = vsub_vf_vf_vf(d, u); + + u = vcast_vf_f(+0.1535920892e-3); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1339262701e-2)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.9618384764e-2)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5550347269e-1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2402264476e+0)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.6931471825e+0)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1000000000e+1)); + + u = vldexp2_vf_vf_vi2(u, q); + + u = vsel_vf_vo_vf_vf(vge_vo_vf_vf(d, vcast_vf_f(128)), vcast_vf_f(SLEEF_INFINITY), u); + u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-150)), vreinterpret_vm_vf(u))); + + return u; +} + +EXPORT CONST VECTOR_CC vfloat xexp10f(vfloat d) { + vfloat u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(LOG10_2))), s; + vint2 q = vrint_vi2_vf(u); + + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Uf), d); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Lf), s); + + u = vcast_vf_f(+0.6802555919e-1); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2078080326e+0)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5393903852e+0)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1171245337e+1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2034678698e+1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2650949001e+1)); + vfloat2 x = dfadd_vf2_vf2_vf(vcast_vf2_f_f(2.3025851249694824219, -3.1705172516493593157e-08), vmul_vf_vf_vf(u, s)); + u = vf2getx_vf_vf2(dfnormalize_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf(x, s)))); + + u = vldexp2_vf_vf_vi2(u, q); + + u = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(38.5318394191036238941387f)), vcast_vf_f(SLEEF_INFINITYf), u); + u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-50)), vreinterpret_vm_vf(u))); + + return u; +} + +EXPORT CONST VECTOR_CC vfloat xexp10f_u35(vfloat d) { + vfloat u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(LOG10_2))), s; + vint2 q = vrint_vi2_vf(u); + + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Uf), d); + s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Lf), s); + + u = vcast_vf_f(+0.2064004987e+0); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5417877436e+0)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1171286821e+1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2034656048e+1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2650948763e+1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2302585125e+1)); + u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1000000000e+1)); + + u = vldexp2_vf_vf_vi2(u, q); + + u = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(38.5318394191036238941387f)), vcast_vf_f(SLEEF_INFINITYf), u); + u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-50)), vreinterpret_vm_vf(u))); + + return u; +} + +EXPORT CONST VECTOR_CC vfloat xexpm1f(vfloat a) { + vfloat2 d = dfadd2_vf2_vf2_vf(expk2f(vcast_vf2_vf_vf(a, vcast_vf_f(0))), vcast_vf_f(-1.0)); + vfloat x = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); + x = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(a, vcast_vf_f(88.72283172607421875f)), vcast_vf_f(SLEEF_INFINITYf), x); + x = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(a, vcast_vf_f(-16.635532333438687426013570f)), vcast_vf_f(-1), x); + x = vsel_vf_vo_vf_vf(visnegzero_vo_vf(a), vcast_vf_f(-0.0f), x); + return x; +} +#endif // #if !defined(DETERMINISTIC) + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xlog10f(vfloat d) { + vfloat2 x; + vfloat t, m, x2; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); + vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); + m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); + e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); +#else + vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); + e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); + m = vgetmant_vf_vf(d); +#endif + + x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); + x2 = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); + + t = vcast_vf_f(+0.1314289868e+0); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f( +0.1735493541e+0)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f( +0.2895309627e+0)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.30103001, -1.432098889e-08), vcast_vf_vi2(e)); +#else + vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.30103001, -1.432098889e-08), e); +#endif + + s = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(0.868588984, -2.170757285e-08))); + s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t)); + + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITY), r); + r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NAN), r); + r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITY), r); +#else + r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); +#endif + + return r; +} + +EXPORT CONST VECTOR_CC vfloat xlog2f(vfloat d) { + vfloat2 x; + vfloat t, m, x2; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); + vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); + m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); + e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); +#else + vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); + e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); + m = vgetmant_vf_vf(d); +#endif + + x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); + x2 = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); + + t = vcast_vf_f(+0.4374550283e+0f); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.5764790177e+0f)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.9618012905120f)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vfloat2 s = dfadd2_vf2_vf_vf2(vcast_vf_vi2(e), + dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(2.8853900432586669922, 3.2734474483568488616e-08))); +#else + vfloat2 s = dfadd2_vf2_vf_vf2(e, + dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(2.8853900432586669922, 3.2734474483568488616e-08))); +#endif + + s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t)); + + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITY), r); + r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NAN), r); + r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITY), r); +#else + r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); +#endif + + return r; +} + +EXPORT CONST VECTOR_CC vfloat xlog2f_u35(vfloat d) { + vfloat m, t, x, x2; + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); + vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); + m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); + e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); +#else + vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); + e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); + m = vgetmant_vf_vf(d); +#endif + + x = vdiv_vf_vf_vf(vsub_vf_vf_vf(m, vcast_vf_f(1)), vadd_vf_vf_vf(m, vcast_vf_f(1))); + x2 = vmul_vf_vf_vf(x, x); + + t = vcast_vf_f(+0.4374088347e+0); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.5764843822e+0)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.9618024230e+0)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vfloat r = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(x2, x), t, + vmla_vf_vf_vf_vf(x, vcast_vf_f(+0.2885390043e+1), vcast_vf_vi2(e))); + + r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITY), r); + r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NAN), r); + r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITY), r); +#else + vfloat r = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(x2, x), t, + vmla_vf_vf_vf_vf(x, vcast_vf_f(+0.2885390043e+1), e)); + + r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); +#endif + + return r; +} + +EXPORT CONST VECTOR_CC vfloat xlog1pf(vfloat d) { + vfloat2 x; + vfloat t, m, x2; + + vfloat dp1 = vadd_vf_vf_vf(d, vcast_vf_f(1)); + +#if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) + vopmask o = vlt_vo_vf_vf(dp1, vcast_vf_f(FLT_MIN)); + dp1 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(dp1, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), dp1); + vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(dp1, vcast_vf_f(1.0f/0.75f))); + t = vldexp3_vf_vf_vi2(vcast_vf_f(1), vneg_vi2_vi2(e)); + m = vmla_vf_vf_vf_vf(d, t, vsub_vf_vf_vf(t, vcast_vf_f(1))); + e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); + vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), vcast_vf_vi2(e)); +#else + vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(dp1, vcast_vf_f(1.0f/0.75f))); + e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); + t = vldexp3_vf_vf_vi2(vcast_vf_f(1), vneg_vi2_vi2(vrint_vi2_vf(e))); + m = vmla_vf_vf_vf_vf(d, t, vsub_vf_vf_vf(t, vcast_vf_f(1))); + vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), e); +#endif + + x = dfdiv_vf2_vf2_vf2(vcast_vf2_vf_vf(m, vcast_vf_f(0)), dfadd_vf2_vf_vf(vcast_vf_f(2), m)); + x2 = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); + + t = vcast_vf_f(+0.3027294874e+0f); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.3996108174e+0f)); + t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.6666694880e+0f)); + + s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); + s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t)); + + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s)); + + r = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(1e+38)), vcast_vf_f(SLEEF_INFINITYf), r); + r = vreinterpret_vf_vm(vor_vm_vo32_vm(vgt_vo_vf_vf(vcast_vf_f(-1), d), vreinterpret_vm_vf(r))); + r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(-1)), vcast_vf_f(-SLEEF_INFINITYf), r); + r = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), r); + + return r; +} +#endif // #if !defined(DETERMINISTIC) + +// + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xfabsf(vfloat x) { return vabs_vf_vf(x); } + +EXPORT CONST VECTOR_CC vfloat xcopysignf(vfloat x, vfloat y) { return vcopysign_vf_vf_vf(x, y); } + +EXPORT CONST VECTOR_CC vfloat xfmaxf(vfloat x, vfloat y) { +#if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) + return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmax_vf_vf_vf(x, y)); +#else + return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(x, y), x, y)); +#endif +} + +EXPORT CONST VECTOR_CC vfloat xfminf(vfloat x, vfloat y) { +#if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) + return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmin_vf_vf_vf(x, y)); +#else + return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(y, x), x, y)); +#endif +} + +EXPORT CONST VECTOR_CC vfloat xfdimf(vfloat x, vfloat y) { + vfloat ret = vsub_vf_vf_vf(x, y); + ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(ret, vcast_vf_f(0)), veq_vo_vf_vf(x, y)), vcast_vf_f(0), ret); + return ret; +} + +EXPORT CONST VECTOR_CC vfloat xtruncf(vfloat x) { +#ifdef FULL_FP_ROUNDING + return vtruncate_vf_vf(x); +#else + vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); + return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23))), x, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); +#endif +} + +EXPORT CONST VECTOR_CC vfloat xfloorf(vfloat x) { + vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); + fr = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(fr, vcast_vf_f(0)), vadd_vf_vf_vf(fr, vcast_vf_f(1.0f)), fr); + return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23))), x, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); +} + +EXPORT CONST VECTOR_CC vfloat xceilf(vfloat x) { + vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); + fr = vsel_vf_vo_vf_vf(vle_vo_vf_vf(fr, vcast_vf_f(0)), fr, vsub_vf_vf_vf(fr, vcast_vf_f(1.0f))); + return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23))), x, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); +} + +EXPORT CONST VECTOR_CC vfloat xroundf(vfloat d) { + vfloat x = vadd_vf_vf_vf(d, vcast_vf_f(0.5f)); + vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); + x = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vle_vo_vf_vf(x, vcast_vf_f(0)), veq_vo_vf_vf(fr, vcast_vf_f(0))), vsub_vf_vf_vf(x, vcast_vf_f(1.0f)), x); + fr = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(fr, vcast_vf_f(0)), vadd_vf_vf_vf(fr, vcast_vf_f(1.0f)), fr); + x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0.4999999701976776123f)), vcast_vf_f(0), x); + return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(d), vge_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(INT64_C(1) << 23))), d, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), d)); +} + +EXPORT CONST VECTOR_CC vfloat xrintf(vfloat d) { +#ifdef FULL_FP_ROUNDING + return vrint_vf_vf(d); +#else + vfloat c = vmulsign_vf_vf_vf(vcast_vf_f(1 << 23), d); + return vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1 << 23)), + d, vorsign_vf_vf_vf(vsub_vf_vf_vf(vadd_vf_vf_vf(d, c), c), d)); +#endif +} + +EXPORT CONST VECTOR_CC vfloat xfmaf(vfloat x, vfloat y, vfloat z) { +#ifdef ENABLE_FMA_SP + return vfma_vf_vf_vf_vf(x, y, z); +#else + vfloat h2 = vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z), q = vcast_vf_f(1); + vopmask o = vlt_vo_vf_vf(vabs_vf_vf(h2), vcast_vf_f(1e-38f)); + { + const float c0 = UINT64_C(1) << 25, c1 = c0 * c0, c2 = c1 * c1; + x = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(x, vcast_vf_f(c1)), x); + y = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(y, vcast_vf_f(c1)), y); + z = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(z, vcast_vf_f(c2)), z); + q = vsel_vf_vo_vf_vf(o, vcast_vf_f(1.0f / c2), q); + } + o = vgt_vo_vf_vf(vabs_vf_vf(h2), vcast_vf_f(1e+38f)); + { + const float c0 = UINT64_C(1) << 25, c1 = c0 * c0, c2 = c1 * c1; + x = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(x, vcast_vf_f(1.0f / c1)), x); + y = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(y, vcast_vf_f(1.0f / c1)), y); + z = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(z, vcast_vf_f(1.0f / c2)), z); + q = vsel_vf_vo_vf_vf(o, vcast_vf_f(c2), q); + } + vfloat2 d = dfmul_vf2_vf_vf(x, y); + d = dfadd2_vf2_vf2_vf(d, z); + vfloat ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(0)), veq_vo_vf_vf(y, vcast_vf_f(0))), z, vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d))); + o = visinf_vo_vf(z); + o = vandnot_vo_vo_vo(visinf_vo_vf(x), o); + o = vandnot_vo_vo_vo(visnan_vo_vf(x), o); + o = vandnot_vo_vo_vo(visinf_vo_vf(y), o); + o = vandnot_vo_vo_vo(visnan_vo_vf(y), o); + h2 = vsel_vf_vo_vf_vf(o, z, h2); + + o = vor_vo_vo_vo(visinf_vo_vf(h2), visnan_vo_vf(h2)); + + return vsel_vf_vo_vf_vf(o, h2, vmul_vf_vf_vf(ret, q)); +#endif +} +#endif // #if !defined(DETERMINISTIC) + +#if !defined(SLEEF_GENHEADER) +static INLINE CONST VECTOR_CC vint2 vcast_vi2_i_i(int i0, int i1) { return vcast_vi2_vm(vcast_vm_i_i(i0, i1)); } +#endif + +SQRTFU05_FUNCATR VECTOR_CC vfloat xsqrtf_u05(vfloat d) { +#if defined(ENABLE_FMA_SP) + vfloat q, w, x, y, z; + + d = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), d); + + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(5.2939559203393770e-23f)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(1.8889465931478580e+22f)), d); + q = vsel_vf_vo_vf_vf(o, vcast_vf_f(7.2759576141834260e-12f), vcast_vf_f(1.0f)); + + y = vreinterpret_vf_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i(0x5f3759df), vsrl_vi2_vi2_i(vreinterpret_vi2_vf(d), 1))); + + x = vmul_vf_vf_vf(d, y); w = vmul_vf_vf_vf(vcast_vf_f(0.5), y); + y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(0.5)); + x = vfma_vf_vf_vf_vf(x, y, x); w = vfma_vf_vf_vf_vf(w, y, w); + y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(0.5)); + x = vfma_vf_vf_vf_vf(x, y, x); w = vfma_vf_vf_vf_vf(w, y, w); + + y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(1.5)); w = vadd_vf_vf_vf(w, w); + w = vmul_vf_vf_vf(w, y); + x = vmul_vf_vf_vf(w, d); + y = vfmapn_vf_vf_vf_vf(w, d, x); z = vfmanp_vf_vf_vf_vf(w, x, vcast_vf_f(1)); + + z = vfmanp_vf_vf_vf_vf(w, y, z); w = vmul_vf_vf_vf(vcast_vf_f(0.5), x); + w = vfma_vf_vf_vf_vf(w, z, y); + w = vadd_vf_vf_vf(w, x); + + w = vmul_vf_vf_vf(w, q); + + w = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(d, vcast_vf_f(0)), + veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf))), d, w); + + w = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), w); + + return w; +#else + vfloat q; + vopmask o; + + d = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), d); + + o = vlt_vo_vf_vf(d, vcast_vf_f(5.2939559203393770e-23f)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(1.8889465931478580e+22f)), d); + q = vsel_vf_vo_vf_vf(o, vcast_vf_f(7.2759576141834260e-12f*0.5f), vcast_vf_f(0.5f)); + + o = vgt_vo_vf_vf(d, vcast_vf_f(1.8446744073709552e+19f)); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(5.4210108624275220e-20f)), d); + q = vsel_vf_vo_vf_vf(o, vcast_vf_f(4294967296.0f * 0.5f), q); + + vfloat x = vreinterpret_vf_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i(0x5f375a86), vsrl_vi2_vi2_i(vreinterpret_vi2_vf(vadd_vf_vf_vf(d, vcast_vf_f(1e-45f))), 1))); + + x = vmul_vf_vf_vf(x, vsub_vf_vf_vf(vcast_vf_f(1.5f), vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(0.5f), d), x), x))); + x = vmul_vf_vf_vf(x, vsub_vf_vf_vf(vcast_vf_f(1.5f), vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(0.5f), d), x), x))); + x = vmul_vf_vf_vf(x, vsub_vf_vf_vf(vcast_vf_f(1.5f), vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(0.5f), d), x), x))); + x = vmul_vf_vf_vf(x, d); + + vfloat2 d2 = dfmul_vf2_vf2_vf2(dfadd2_vf2_vf_vf2(d, dfmul_vf2_vf_vf(x, x)), dfrec_vf2_vf(x)); + + x = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d2), vf2gety_vf_vf2(d2)), q); + + x = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), x); + x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), d, x); + + return x; +#endif +} + +EXPORT CONST VECTOR_CC vfloat xsqrtf(vfloat d) { +#ifdef ACCURATE_SQRT + return vsqrt_vf_vf(d); +#else + // fall back to approximation if ACCURATE_SQRT is undefined + return xsqrtf_u05(d); +#endif +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xhypotf_u05(vfloat x, vfloat y) { + x = vabs_vf_vf(x); + y = vabs_vf_vf(y); + vfloat min = vmin_vf_vf_vf(x, y), n = min; + vfloat max = vmax_vf_vf_vf(x, y), d = max; + + vopmask o = vlt_vo_vf_vf(max, vcast_vf_f(FLT_MIN)); + n = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(n, vcast_vf_f(UINT64_C(1) << 24)), n); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(UINT64_C(1) << 24)), d); + + vfloat2 t = dfdiv_vf2_vf2_vf2(vcast_vf2_vf_vf(n, vcast_vf_f(0)), vcast_vf2_vf_vf(d, vcast_vf_f(0))); + t = dfmul_vf2_vf2_vf(dfsqrt_vf2_vf2(dfadd2_vf2_vf2_vf(dfsqu_vf2_vf2(t), vcast_vf_f(1))), max); + vfloat ret = vadd_vf_vf_vf(vf2getx_vf_vf2(t), vf2gety_vf_vf2(t)); + ret = vsel_vf_vo_vf_vf(visnan_vo_vf(ret), vcast_vf_f(SLEEF_INFINITYf), ret); + ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(min, vcast_vf_f(0)), max, ret); + ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vcast_vf_f(SLEEF_NANf), ret); + ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(SLEEF_INFINITYf)), veq_vo_vf_vf(y, vcast_vf_f(SLEEF_INFINITYf))), vcast_vf_f(SLEEF_INFINITYf), ret); + + return ret; +} + +EXPORT CONST VECTOR_CC vfloat xhypotf_u35(vfloat x, vfloat y) { + x = vabs_vf_vf(x); + y = vabs_vf_vf(y); + vfloat min = vmin_vf_vf_vf(x, y), n = min; + vfloat max = vmax_vf_vf_vf(x, y), d = max; + + vfloat t = vdiv_vf_vf_vf(min, max); + vfloat ret = vmul_vf_vf_vf(max, vsqrt_vf_vf(vmla_vf_vf_vf_vf(t, t, vcast_vf_f(1)))); + ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(min, vcast_vf_f(0)), max, ret); + ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vcast_vf_f(SLEEF_NANf), ret); + ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(SLEEF_INFINITYf)), veq_vo_vf_vf(y, vcast_vf_f(SLEEF_INFINITYf))), vcast_vf_f(SLEEF_INFINITYf), ret); + + return ret; +} + +EXPORT CONST VECTOR_CC vfloat xnextafterf(vfloat x, vfloat y) { + x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vmulsign_vf_vf_vf(vcast_vf_f(0), y), x); + vint2 t, xi2 = vreinterpret_vi2_vf(x); + vopmask c = vxor_vo_vo_vo(vsignbit_vo_vf(x), vge_vo_vf_vf(y, x)); + + xi2 = vsel_vi2_vo_vi2_vi2(c, vsub_vi2_vi2_vi2(vcast_vi2_i(0), vxor_vi2_vi2_vi2(xi2, vcast_vi2_i(1 << 31))), xi2); + + xi2 = vsel_vi2_vo_vi2_vi2(vneq_vo_vf_vf(x, y), vsub_vi2_vi2_vi2(xi2, vcast_vi2_i(1)), xi2); + + xi2 = vsel_vi2_vo_vi2_vi2(c, vsub_vi2_vi2_vi2(vcast_vi2_i(0), vxor_vi2_vi2_vi2(xi2, vcast_vi2_i(1 << 31))), xi2); + + vfloat ret = vreinterpret_vf_vi2(xi2); + + ret = vsel_vf_vo_vf_vf(vand_vo_vo_vo(veq_vo_vf_vf(ret, vcast_vf_f(0)), vneq_vo_vf_vf(x, vcast_vf_f(0))), + vmulsign_vf_vf_vf(vcast_vf_f(0), x), ret); + + ret = vsel_vf_vo_vf_vf(vand_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(0)), veq_vo_vf_vf(y, vcast_vf_f(0))), y, ret); + + ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vcast_vf_f(SLEEF_NANf), ret); + + return ret; +} + +EXPORT CONST VECTOR_CC vfloat xfrfrexpf(vfloat x) { + x = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(FLT_MIN)), vmul_vf_vf_vf(x, vcast_vf_f(UINT64_C(1) << 30)), x); + + vmask xm = vreinterpret_vm_vf(x); + xm = vand_vm_vm_vm(xm, vcast_vm_i_i(~0x7f800000U, ~0x7f800000U)); + xm = vor_vm_vm_vm (xm, vcast_vm_i_i( 0x3f000000U, 0x3f000000U)); + + vfloat ret = vreinterpret_vf_vm(xm); + + ret = vsel_vf_vo_vf_vf(visinf_vo_vf(x), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), x), ret); + ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), x, ret); + + return ret; +} +#endif // #if !defined(DETERMINISTIC) + +EXPORT CONST VECTOR_CC vint2 xexpfrexpf(vfloat x) { + /* + x = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(FLT_MIN)), vmul_vf_vf_vf(x, vcast_vf_f(UINT64_C(1) << 63)), x); + + vint ret = vcastu_vi_vi2(vreinterpret_vi2_vf(x)); + ret = vsub_vi_vi_vi(vand_vi_vi_vi(vsrl_vi_vi_i(ret, 20), vcast_vi_i(0x7ff)), vcast_vi_i(0x3fe)); + + ret = vsel_vi_vo_vi_vi(vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(0)), visnan_vo_vf(x)), visinf_vo_vf(x)), vcast_vi_i(0), ret); + + return ret; + */ + return vcast_vi2_i(0); +} + +static INLINE CONST VECTOR_CC vfloat vtoward0f(vfloat x) { + vfloat t = vreinterpret_vf_vi2(vsub_vi2_vi2_vi2(vreinterpret_vi2_vf(x), vcast_vi2_i(1))); + return vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vcast_vf_f(0), t); +} + +static INLINE CONST VECTOR_CC vfloat vptruncf(vfloat x) { +#ifdef FULL_FP_ROUNDING + return vtruncate_vf_vf(x); +#else + vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); + return vsel_vf_vo_vf_vf(vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23)), x, vsub_vf_vf_vf(x, fr)); +#endif +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xfmodf(vfloat x, vfloat y) { + vfloat nu = vabs_vf_vf(x), de = vabs_vf_vf(y), s = vcast_vf_f(1), q; + vopmask o = vlt_vo_vf_vf(de, vcast_vf_f(FLT_MIN)); + nu = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(nu, vcast_vf_f(UINT64_C(1) << 25)), nu); + de = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(de, vcast_vf_f(UINT64_C(1) << 25)), de); + s = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(s , vcast_vf_f(1.0f / (UINT64_C(1) << 25))), s); + vfloat rde = vtoward0f(vrec_vf_vf(de)); +#if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4) + rde = vtoward0f(rde); +#endif + vfloat2 r = vcast_vf2_vf_vf(nu, vcast_vf_f(0)); + + for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 + q = vptruncf(vmul_vf_vf_vf(vtoward0f(vf2getx_vf_vf2(r)), rde)); + q = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vgt_vo_vf_vf(vmul_vf_vf_vf(vcast_vf_f(3), de), vf2getx_vf_vf2(r)), + vge_vo_vf_vf(vf2getx_vf_vf2(r), de)), + vcast_vf_f(2), q); + q = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vgt_vo_vf_vf(vmul_vf_vf_vf(vcast_vf_f(2), de), vf2getx_vf_vf2(r)), + vge_vo_vf_vf(vf2getx_vf_vf2(r), de)), + vcast_vf_f(1), q); + r = dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf2(r, dfmul_vf2_vf_vf(vptruncf(q), vneg_vf_vf(de)))); + if (vtestallones_i_vo32(vlt_vo_vf_vf(vf2getx_vf_vf2(r), de))) break; + } + + vfloat ret = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(r), vf2gety_vf_vf2(r)), s); + ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(r), vf2gety_vf_vf2(r)), de), vcast_vf_f(0), ret); + + ret = vmulsign_vf_vf_vf(ret, x); + + ret = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(nu, de), x, ret); + ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(de, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), ret); + + return ret; +} + +static INLINE CONST VECTOR_CC vfloat vrintfk2_vf_vf(vfloat d) { +#ifdef FULL_FP_ROUNDING + return vrint_vf_vf(d); +#else + vfloat c = vmulsign_vf_vf_vf(vcast_vf_f(1 << 23), d); + return vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1 << 23)), + d, vorsign_vf_vf_vf(vsub_vf_vf_vf(vadd_vf_vf_vf(d, c), c), d)); +#endif +} + +EXPORT CONST VECTOR_CC vfloat xremainderf(vfloat x, vfloat y) { + vfloat n = vabs_vf_vf(x), d = vabs_vf_vf(y), s = vcast_vf_f(1), q; + vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN*2)); + n = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(n, vcast_vf_f(UINT64_C(1) << 25)), n); + d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(UINT64_C(1) << 25)), d); + s = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(s , vcast_vf_f(1.0f / (UINT64_C(1) << 25))), s); + vfloat2 r = vcast_vf2_vf_vf(n, vcast_vf_f(0)); + vfloat rd = vrec_vf_vf(d); + vopmask qisodd = vneq_vo_vf_vf(vcast_vf_f(0), vcast_vf_f(0)); + + for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 + q = vrintfk2_vf_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(r), rd)); + q = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vabs_vf_vf(vf2getx_vf_vf2(r)), vmul_vf_vf_vf(d, vcast_vf_f(1.5f))), vmulsign_vf_vf_vf(vcast_vf_f(1.0f), vf2getx_vf_vf2(r)), q); + q = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(vabs_vf_vf(vf2getx_vf_vf2(r)), vmul_vf_vf_vf(d, vcast_vf_f(0.5f))), + vandnot_vo_vo_vo(qisodd, veq_vo_vf_vf(vabs_vf_vf(vf2getx_vf_vf2(r)), vmul_vf_vf_vf(d, vcast_vf_f(0.5f))))), + vcast_vf_f(0.0), q); + if (vtestallones_i_vo32(veq_vo_vf_vf(q, vcast_vf_f(0)))) break; + q = vsel_vf_vo_vf_vf(visinf_vo_vf(vmul_vf_vf_vf(q, vneg_vf_vf(d))), vadd_vf_vf_vf(q, vmulsign_vf_vf_vf(vcast_vf_f(-1), vf2getx_vf_vf2(r))), q); + qisodd = vxor_vo_vo_vo(qisodd, vand_vo_vo_vo(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vtruncate_vi2_vf(q), vcast_vi2_i(1)), vcast_vi2_i(1)), + vlt_vo_vf_vf(vabs_vf_vf(q), vcast_vf_f(1 << 24)))); + r = dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf2(r, dfmul_vf2_vf_vf(q, vneg_vf_vf(d)))); + } + + vfloat ret = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(r), vf2gety_vf_vf2(r)), s); + ret = vmulsign_vf_vf_vf(ret, x); + ret = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vsel_vf_vo_vf_vf(visinf_vo_vf(x), vcast_vf_f(SLEEF_NANf), x), ret); + ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), ret); + return ret; +} +#endif // #if !defined(DETERMINISTIC) + +// + +static INLINE CONST VECTOR_CC vfloat2 sinpifk(vfloat d) { + vopmask o; + vfloat u, s, t; + vfloat2 x, s2; + + u = vmul_vf_vf_vf(d, vcast_vf_f(4.0)); + vint2 q = vtruncate_vi2_vf(u); + q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); + + s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); + t = s; + s = vmul_vf_vf_vf(s, s); + s2 = dfmul_vf2_vf_vf(t, t); + + // + + u = vsel_vf_vo_f_f(o, -0.2430611801e-7f, +0.3093842054e-6f); + u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, +0.3590577080e-5f, -0.3657307388e-4f)); + u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, -0.3259917721e-3f, +0.2490393585e-2f)); + x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), + vsel_vf2_vo_f_f_f_f(o, 0.015854343771934509277, 4.4940051354032242811e-10, + -0.080745510756969451904, -1.3373665339076936258e-09)); + x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), + vsel_vf2_vo_f_f_f_f(o, -0.30842512845993041992, -9.0728339030733922277e-09, + 0.78539818525314331055, -2.1857338617566484855e-08)); + + x = dfmul_vf2_vf2_vf2(x, vsel_vf2_vo_vf2_vf2(o, s2, vcast_vf2_vf_vf(t, vcast_vf_f(0)))); + x = vsel_vf2_vo_vf2_vf2(o, dfadd2_vf2_vf2_vf(x, vcast_vf_f(1)), x); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(4)), vcast_vi2_i(4)); + x = vf2setx_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(x))))); + x = vf2sety_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(x))))); + + return x; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xsinpif_u05(vfloat d) { + vfloat2 x = sinpifk(d); + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + r = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0), r); + r = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX4f)), vreinterpret_vm_vf(r))); + r = vreinterpret_vf_vm(vor_vm_vo32_vm(visinf_vo_vf(d), vreinterpret_vm_vf(r))); + + return r; +} +#endif // #if !defined(DETERMINISTIC) + +static INLINE CONST VECTOR_CC vfloat2 cospifk(vfloat d) { + vopmask o; + vfloat u, s, t; + vfloat2 x, s2; + + u = vmul_vf_vf_vf(d, vcast_vf_f(4.0)); + vint2 q = vtruncate_vi2_vf(u); + q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)); + + s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); + t = s; + s = vmul_vf_vf_vf(s, s); + s2 = dfmul_vf2_vf_vf(t, t); + + // + + u = vsel_vf_vo_f_f(o, -0.2430611801e-7f, +0.3093842054e-6f); + u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, +0.3590577080e-5f, -0.3657307388e-4f)); + u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, -0.3259917721e-3f, +0.2490393585e-2f)); + x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), + vsel_vf2_vo_f_f_f_f(o, 0.015854343771934509277, 4.4940051354032242811e-10, + -0.080745510756969451904, -1.3373665339076936258e-09)); + x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), + vsel_vf2_vo_f_f_f_f(o, -0.30842512845993041992, -9.0728339030733922277e-09, + 0.78539818525314331055, -2.1857338617566484855e-08)); + + x = dfmul_vf2_vf2_vf2(x, vsel_vf2_vo_vf2_vf2(o, s2, vcast_vf2_vf_vf(t, vcast_vf_f(0)))); + x = vsel_vf2_vo_vf2_vf2(o, dfadd2_vf2_vf2_vf(x, vcast_vf_f(1)), x); + + o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(4)), vcast_vi2_i(4)); + x = vf2setx_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(x))))); + x = vf2sety_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(x))))); + + return x; +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xcospif_u05(vfloat d) { + vfloat2 x = cospifk(d); + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); + + r = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX4f)), vcast_vf_f(1), r); + r = vreinterpret_vf_vm(vor_vm_vo32_vm(visinf_vo_vf(d), vreinterpret_vm_vf(r))); + + return r; +} +#endif // #if !defined(DETERMINISTIC) + +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) + typedef struct { + vfloat2 a, b; + } df2; + +static df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) { + df2 r = { a, b }; + return r; +} +static vfloat2 df2geta_vf2_df2(df2 d) { return d.a; } +static vfloat2 df2getb_vf2_df2(df2 d) { return d.b; } +#endif + +/* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ +static CONST df2 gammafk(vfloat a) { + vfloat2 clc = vcast_vf2_f_f(0, 0), clln = vcast_vf2_f_f(1, 0), clld = vcast_vf2_f_f(1, 0); + vfloat2 v = vcast_vf2_f_f(1, 0), x, y, z; + vfloat t, u; + + vopmask otiny = vlt_vo_vf_vf(vabs_vf_vf(a), vcast_vf_f(1e-30f)), oref = vlt_vo_vf_vf(a, vcast_vf_f(0.5)); + + x = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_f_f(0, 0), + vsel_vf2_vo_vf2_vf2(oref, dfadd2_vf2_vf_vf(vcast_vf_f(1), vneg_vf_vf(a)), + vcast_vf2_vf_vf(a, vcast_vf_f(0)))); + + vopmask o0 = vand_vo_vo_vo(vle_vo_vf_vf(vcast_vf_f(0.5), vf2getx_vf_vf2(x)), vle_vo_vf_vf(vf2getx_vf_vf2(x), vcast_vf_f(1.2))); + vopmask o2 = vle_vo_vf_vf(vcast_vf_f(2.3), vf2getx_vf_vf2(x)); + + y = dfnormalize_vf2_vf2(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(x, vcast_vf_f(1)), x)); + y = dfnormalize_vf2_vf2(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(x, vcast_vf_f(2)), y)); + + vopmask o = vand_vo_vo_vo(o2, vle_vo_vf_vf(vf2getx_vf_vf2(x), vcast_vf_f(7))); + clln = vsel_vf2_vo_vf2_vf2(o, y, clln); + + x = vsel_vf2_vo_vf2_vf2(o, dfadd2_vf2_vf2_vf(x, vcast_vf_f(3)), x); + t = vsel_vf_vo_vf_vf(o2, vrec_vf_vf(vf2getx_vf_vf2(x)), vf2getx_vf_vf2(dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf(x, vsel_vf_vo_f_f(o0, -1, -2))))); + + u = vsel_vf_vo_vo_f_f_f(o2, o0, +0.000839498720672087279971000786, +0.9435157776e+0f, +0.1102489550e-3f); + u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -5.17179090826059219329394422e-05, +0.8670063615e+0f, +0.8160019934e-4f)); + u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -0.000592166437353693882857342347, +0.4826702476e+0f, +0.1528468856e-3f)); + u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +6.97281375836585777403743539e-05, -0.8855129778e-1f, -0.2355068718e-3f)); + u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +0.000784039221720066627493314301, +0.1013825238e+0f, +0.4962242092e-3f)); + u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -0.000229472093621399176949318732, -0.1493408978e+0f, -0.1193488017e-2f)); + u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -0.002681327160493827160473958490, +0.1697509140e+0f, +0.2891599433e-2f)); + u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +0.003472222222222222222175164840, -0.2072454542e+0f, -0.7385451812e-2f)); + u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +0.083333333333333333335592087900, +0.2705872357e+0f, +0.2058077045e-1f)); + + y = dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(x, vcast_vf_f(-0.5)), logk2f(x)); + y = dfadd2_vf2_vf2_vf2(y, dfneg_vf2_vf2(x)); + y = dfadd2_vf2_vf2_vf2(y, vcast_vf2_d(0.91893853320467278056)); // 0.5*log(2*M_PI) + + z = dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf (u, t), vsel_vf_vo_f_f(o0, -0.400686534596170958447352690395e+0f, -0.673523028297382446749257758235e-1f)); + z = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(z, t), vsel_vf_vo_f_f(o0, +0.822466960142643054450325495997e+0f, +0.322467033928981157743538726901e+0f)); + z = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(z, t), vsel_vf_vo_f_f(o0, -0.577215665946766039837398973297e+0f, +0.422784335087484338986941629852e+0f)); + z = dfmul_vf2_vf2_vf(z, t); + + clc = vsel_vf2_vo_vf2_vf2(o2, y, z); + + clld = vsel_vf2_vo_vf2_vf2(o2, dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf(u, t), vcast_vf_f(1)), clld); + + y = clln; + + clc = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_d(41.58883083359671856503), // log(2^60) + vsel_vf2_vo_vf2_vf2(oref, dfadd2_vf2_vf2_vf2(vcast_vf2_d(1.1447298858494001639), dfneg_vf2_vf2(clc)), clc)); // log(M_PI) + clln = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_f_f(1, 0), vsel_vf2_vo_vf2_vf2(oref, clln, clld)); + + if (!vtestallones_i_vo32(vnot_vo32_vo32(oref))) { + t = vsub_vf_vf_vf(a, vmul_vf_vf_vf(vcast_vf_f(INT64_C(1) << 12), vcast_vf_vi2(vtruncate_vi2_vf(vmul_vf_vf_vf(a, vcast_vf_f(1.0 / (INT64_C(1) << 12))))))); + x = dfmul_vf2_vf2_vf2(clld, sinpifk(t)); + } + + clld = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_vf_vf(vmul_vf_vf_vf(a, vcast_vf_f((INT64_C(1) << 30)*(float)(INT64_C(1) << 30))), vcast_vf_f(0)), + vsel_vf2_vo_vf2_vf2(oref, x, y)); + + return df2setab_df2_vf2_vf2(clc, dfdiv_vf2_vf2_vf2(clln, clld)); +} + +#if !defined(DETERMINISTIC) +EXPORT CONST VECTOR_CC vfloat xtgammaf_u1(vfloat a) { + df2 d = gammafk(a); + vfloat2 y = dfmul_vf2_vf2_vf2(expk2f(df2geta_vf2_df2(d)), df2getb_vf2_df2(d)); + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(y), vf2gety_vf_vf2(y)); + vopmask o; + + o = vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(a, vcast_vf_f(-SLEEF_INFINITYf)), + vand_vo_vo_vo(vlt_vo_vf_vf(a, vcast_vf_f(0)), visint_vo_vf(a))), + vand_vo_vo_vo(vand_vo_vo_vo(visnumber_vo_vf(a), vlt_vo_vf_vf(a, vcast_vf_f(0))), visnan_vo_vf(r))); + r = vsel_vf_vo_vf_vf(o, vcast_vf_f(SLEEF_NANf), r); + + o = vand_vo_vo_vo(vand_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(a, vcast_vf_f(SLEEF_INFINITYf)), visnumber_vo_vf(a)), + vge_vo_vf_vf(a, vcast_vf_f(-FLT_MIN))), + vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(a, vcast_vf_f(0)), vgt_vo_vf_vf(a, vcast_vf_f(36))), visnan_vo_vf(r))); + r = vsel_vf_vo_vf_vf(o, vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), a), r); + + return r; +} + +EXPORT CONST VECTOR_CC vfloat xlgammaf_u1(vfloat a) { + df2 d = gammafk(a); + vfloat2 y = dfadd2_vf2_vf2_vf2(df2geta_vf2_df2(d), logk2f(dfabs_vf2_vf2(df2getb_vf2_df2(d)))); + vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(y), vf2gety_vf_vf2(y)); + vopmask o; + + o = vor_vo_vo_vo(visinf_vo_vf(a), + vor_vo_vo_vo(vand_vo_vo_vo(vle_vo_vf_vf(a, vcast_vf_f(0)), visint_vo_vf(a)), + vand_vo_vo_vo(visnumber_vo_vf(a), visnan_vo_vf(r)))); + r = vsel_vf_vo_vf_vf(o, vcast_vf_f(SLEEF_INFINITYf), r); + + return r; +} + +/* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ +EXPORT CONST VECTOR_CC vfloat xerff_u1(vfloat a) { + vfloat s = a, t, u; + vfloat2 d; + + a = vabs_vf_vf(a); + vopmask o0 = vlt_vo_vf_vf(a, vcast_vf_f(1.1)); + vopmask o1 = vlt_vo_vf_vf(a, vcast_vf_f(2.4)); + vopmask o2 = vlt_vo_vf_vf(a, vcast_vf_f(4.0)); + u = vsel_vf_vo_vf_vf(o0, vmul_vf_vf_vf(a, a), a); + + t = vsel_vf_vo_vo_f_f_f(o0, o1, +0.7089292194e-4f, -0.1792667899e-4f, -0.9495757695e-5f); + t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, -0.7768311189e-3f, +0.3937633010e-3f, +0.2481465926e-3f)); + t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, +0.5159463733e-2f, -0.3949181177e-2f, -0.2918176819e-2f)); + t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, -0.2683781274e-1f, +0.2445474640e-1f, +0.2059706673e-1f)); + t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, +0.1128318012e+0f, -0.1070996150e+0f, -0.9901899844e-1f)); + d = dfmul_vf2_vf_vf(t, u); + d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_d_d_d(o0, o1, -0.376125876000657465175213237214e+0, -0.634588905908410389971210809210e+0, -0.643598050547891613081201721633e+0)); + d = dfmul_vf2_vf2_vf(d, u); + d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_d_d_d(o0, o1, +0.112837916021059138255978217023e+1, -0.112879855826694507209862753992e+1, -0.112461487742845562801052956293e+1)); + d = dfmul_vf2_vf2_vf(d, a); + d = vsel_vf2_vo_vf2_vf2(o0, d, dfadd_vf2_vf_vf2(vcast_vf_f(1.0), dfneg_vf2_vf2(expk2f(d)))); + u = vmulsign_vf_vf_vf(vsel_vf_vo_vf_vf(o2, vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(1)), s); + u = vsel_vf_vo_vf_vf(visnan_vo_vf(a), vcast_vf_f(SLEEF_NANf), u); + + return u; +} + +/* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ +EXPORT CONST VECTOR_CC vfloat xerfcf_u15(vfloat a) { + vfloat s = a, r = vcast_vf_f(0), t; + vfloat2 u, d, x; + a = vabs_vf_vf(a); + vopmask o0 = vlt_vo_vf_vf(a, vcast_vf_f(1.0)); + vopmask o1 = vlt_vo_vf_vf(a, vcast_vf_f(2.2)); + vopmask o2 = vlt_vo_vf_vf(a, vcast_vf_f(4.3)); + vopmask o3 = vlt_vo_vf_vf(a, vcast_vf_f(10.1)); + + u = vsel_vf2_vo_vf2_vf2(o1, vcast_vf2_vf_vf(a, vcast_vf_f(0)), dfdiv_vf2_vf2_vf2(vcast_vf2_f_f(1, 0), vcast_vf2_vf_vf(a, vcast_vf_f(0)))); + + t = vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, -0.8638041618e-4f, -0.6236977242e-5f, -0.3869504035e+0f, +0.1115344167e+1f); + t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(u), vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, +0.6000166177e-3f, +0.5749821503e-4f, +0.1288077235e+1f, -0.9454904199e+0f)); + t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(u), vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, -0.1665703603e-2f, +0.6002851478e-5f, -0.1816803217e+1f, -0.3667259514e+0f)); + t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(u), vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, +0.1795156277e-3f, -0.2851036377e-2f, +0.1249150872e+1f, +0.7155663371e+0f)); + t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(u), vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, +0.1914106123e-1f, +0.2260518074e-1f, -0.1328857988e+0f, -0.1262947265e-1f)); + + d = dfmul_vf2_vf2_vf(u, t); + d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.102775359343930288081655368891e+0, -0.105247583459338632253369014063e+0, -0.482365310333045318680618892669e+0, -0.498961546254537647970305302739e+0)); + d = dfmul_vf2_vf2_vf2(d, u); + d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.636619483208481931303752546439e+0, -0.635609463574589034216723775292e+0, -0.134450203224533979217859332703e-2, -0.471199543422848492080722832666e-4)); + d = dfmul_vf2_vf2_vf2(d, u); + d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.112837917790537404939545770596e+1, -0.112855987376668622084547028949e+1, -0.572319781150472949561786101080e+0, -0.572364030327966044425932623525e+0)); + + x = dfmul_vf2_vf2_vf(vsel_vf2_vo_vf2_vf2(o1, d, vcast_vf2_vf_vf(vneg_vf_vf(a), vcast_vf_f(0))), a); + x = vsel_vf2_vo_vf2_vf2(o1, x, dfadd2_vf2_vf2_vf2(x, d)); + + x = expk2f(x); + x = vsel_vf2_vo_vf2_vf2(o1, x, dfmul_vf2_vf2_vf2(x, u)); + + r = vsel_vf_vo_vf_vf(o3, vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)), vcast_vf_f(0)); + r = vsel_vf_vo_vf_vf(vsignbit_vo_vf(s), vsub_vf_vf_vf(vcast_vf_f(2), r), r); + r = vsel_vf_vo_vf_vf(visnan_vo_vf(s), vcast_vf_f(SLEEF_NANf), r); + return r; +} +#endif // #if !defined(DETERMINISTIC) + +#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) +// See sleefsimddp.c for explanation of these macros + +#ifdef ENABLE_ALIAS +#define DALIAS_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat) __attribute__((alias( stringify(x ## FUNC) ))); +#define DALIAS_vf2_vf(FUNC) EXPORT CONST VECTOR_CC vfloat2 y ## FUNC(vfloat) __attribute__((alias( stringify(x ## FUNC) ))); +#define DALIAS_vf_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat, vfloat) __attribute__((alias( stringify(x ## FUNC) ))); +#define DALIAS_vf_vf_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat, vfloat, vfloat) __attribute__((alias( stringify(x ## FUNC) ))); +#else +#define DALIAS_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat d) { return x ## FUNC (d); } +#define DALIAS_vf2_vf(FUNC) EXPORT CONST VECTOR_CC vfloat2 y ## FUNC(vfloat d) { return x ## FUNC (d); } +#define DALIAS_vf_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat x, vfloat y) { return x ## FUNC (x, y); } +#define DALIAS_vf_vf_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat x, vfloat y, vfloat z) { return x ## FUNC (x, y, z); } +#endif + +/* DALIAS_vf2_vf(sincospif_u05) */ +/* DALIAS_vf2_vf(sincospif_u35) */ +/* DALIAS_vf2_vf(modff) */ +/* DALIAS_vf_vf(atanf) */ +/* DALIAS_vf_vf_vf(atan2f) */ +/* DALIAS_vf_vf(asinf) */ +/* DALIAS_vf_vf(acosf) */ +/* DALIAS_vf_vf_vf(atan2f_u1) */ +/* DALIAS_vf_vf(asinf_u1) */ +/* DALIAS_vf_vf(acosf_u1) */ +/* DALIAS_vf_vf(atanf_u1) */ +/* DALIAS_vf_vf(logf) */ +/* DALIAS_vf_vf(expf) */ +/* DALIAS_vf_vf(cbrtf) */ +/* DALIAS_vf_vf(cbrtf_u1) */ +/* DALIAS_vf_vf(logf_u1) */ +/* DALIAS_vf_vf_vf(powf) */ +/* DALIAS_vf_vf(sinhf) */ +/* DALIAS_vf_vf(coshf) */ +/* DALIAS_vf_vf(tanhf) */ +/* DALIAS_vf_vf(sinhf_u35) */ +/* DALIAS_vf_vf(coshf_u35) */ +/* DALIAS_vf_vf(tanhf_u35) */ +/* DALIAS_vf_vf(asinhf) */ +/* DALIAS_vf_vf(acoshf) */ +/* DALIAS_vf_vf(atanhf) */ +/* DALIAS_vf_vf(exp2f) */ +/* DALIAS_vf_vf(exp2f_u35) */ +/* DALIAS_vf_vf(exp10f) */ +/* DALIAS_vf_vf(exp10f_u35) */ +/* DALIAS_vf_vf(expm1f) */ +/* DALIAS_vf_vf(log10f) */ +/* DALIAS_vf_vf(log2f) */ +/* DALIAS_vf_vf(log2f_u35) */ +/* DALIAS_vf_vf(log1pf) */ +/* DALIAS_vf_vf(fabsf) */ +/* DALIAS_vf_vf_vf(copysignf) */ +/* DALIAS_vf_vf_vf(fmaxf) */ +/* DALIAS_vf_vf_vf(fminf) */ +/* DALIAS_vf_vf_vf(fdimf) */ +/* DALIAS_vf_vf(truncf) */ +/* DALIAS_vf_vf(floorf) */ +/* DALIAS_vf_vf(ceilf) */ +/* DALIAS_vf_vf(roundf) */ +/* DALIAS_vf_vf(rintf) */ +/* DALIAS_vf_vf_vf_vf(fmaf) */ +/* DALIAS_vf_vf_vf(hypotf_u05) */ +/* DALIAS_vf_vf_vf(hypotf_u35) */ +/* DALIAS_vf_vf_vf(nextafterf) */ +/* DALIAS_vf_vf(frfrexpf) */ +/* DALIAS_vf_vf_vf(fmodf) */ +/* DALIAS_vf_vf_vf(remainderf) */ +/* DALIAS_vf_vf(sinpif_u05) */ +/* DALIAS_vf_vf(cospif_u05) */ +/* DALIAS_vf_vf(tgammaf_u1) */ +/* DALIAS_vf_vf(lgammaf_u1) */ +/* DALIAS_vf_vf(erff_u1) */ +/* DALIAS_vf_vf(erfcf_u15) */ +/* DALIAS_vf_vf_vf(fastpowf_u3500) */ +#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) + +#if !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) +EXPORT CONST int xgetIntf(int name) { + if (1 <= name && name <= 10) return vavailability_i(name); + return 0; +} + +EXPORT CONST void *xgetPtrf(int name) { + if (name == 0) return ISANAME; + return (void *)0; +} +#endif + +#if defined(ALIAS_NO_EXT_SUFFIX) && !defined(DETERMINISTIC) +#include ALIAS_NO_EXT_SUFFIX +#endif + +#ifdef ENABLE_GNUABI +EXPORT CONST VECTOR_CC vfloat __acosf_finite (vfloat) __attribute__((weak, alias(str_xacosf_u1 ))); +EXPORT CONST VECTOR_CC vfloat __acoshf_finite (vfloat) __attribute__((weak, alias(str_xacoshf ))); +EXPORT CONST VECTOR_CC vfloat __asinf_finite (vfloat) __attribute__((weak, alias(str_xasinf_u1 ))); +EXPORT CONST VECTOR_CC vfloat __atan2f_finite (vfloat, vfloat) __attribute__((weak, alias(str_xatan2f_u1 ))); +EXPORT CONST VECTOR_CC vfloat __atanhf_finite (vfloat) __attribute__((weak, alias(str_xatanhf ))); +EXPORT CONST VECTOR_CC vfloat __coshf_finite (vfloat) __attribute__((weak, alias(str_xcoshf ))); +EXPORT CONST VECTOR_CC vfloat __exp10f_finite (vfloat) __attribute__((weak, alias(str_xexp10f ))); +EXPORT CONST VECTOR_CC vfloat __exp2f_finite (vfloat) __attribute__((weak, alias(str_xexp2f ))); +EXPORT CONST VECTOR_CC vfloat __expf_finite (vfloat) __attribute__((weak, alias(str_xexpf ))); +EXPORT CONST VECTOR_CC vfloat __fmodf_finite (vfloat, vfloat) __attribute__((weak, alias(str_xfmodf ))); +EXPORT CONST VECTOR_CC vfloat __remainderf_finite(vfloat, vfloat) __attribute__((weak, alias(str_xremainderf))); +EXPORT CONST VECTOR_CC vfloat __modff_finite (vfloat, vfloat *) __attribute__((weak, alias(str_xmodff ))); +EXPORT CONST VECTOR_CC vfloat __hypotf_u05_finite(vfloat, vfloat) __attribute__((weak, alias(str_xhypotf_u05))); +EXPORT CONST VECTOR_CC vfloat __lgammaf_u1_finite(vfloat) __attribute__((weak, alias(str_xlgammaf_u1))); +EXPORT CONST VECTOR_CC vfloat __log10f_finite (vfloat) __attribute__((weak, alias(str_xlog10f ))); +EXPORT CONST VECTOR_CC vfloat __logf_finite (vfloat) __attribute__((weak, alias(str_xlogf_u1 ))); +EXPORT CONST VECTOR_CC vfloat __powf_finite (vfloat, vfloat) __attribute__((weak, alias(str_xpowf ))); +EXPORT CONST VECTOR_CC vfloat __sinhf_finite (vfloat) __attribute__((weak, alias(str_xsinhf ))); +EXPORT CONST VECTOR_CC vfloat __sqrtf_finite (vfloat) __attribute__((weak, alias(str_xsqrtf ))); +EXPORT CONST VECTOR_CC vfloat __tgammaf_u1_finite(vfloat) __attribute__((weak, alias(str_xtgammaf_u1))); + +#ifdef HEADER_MASKED +#include HEADER_MASKED +#endif +#endif /* #ifdef ENABLE_GNUABI */ + +#ifdef ENABLE_MAIN +// gcc -DENABLE_MAIN -Wno-attributes -I../common -I../arch -DENABLE_AVX2 -mavx2 -mfma sleefsimdsp.c rempitab.c ../common/common.c -lm +#include +#include +#include +int main(int argc, char **argv) { + vfloat vf1 = vcast_vf_f(atof(argv[1])); + //vfloat vf2 = vcast_vf_f(atof(argv[2])); + + //vfloat r = xpowf(vf1, vf2); + //vfloat r = xsqrtf_u05(vf1); + //printf("%g\n", xnextafterf(vf1, vf2)[0]); + //printf("%g\n", nextafterf(atof(argv[1]), atof(argv[2]))); + printf("t = %.20g\n", xlogf_u1(vf1)[0]); + printf("c = %.20g\n", logf(atof(argv[1]))); + +} +#endif diff --git a/src/sleefsimdsp_emulation.c b/src/sleefsimdsp_emulation.c new file mode 100644 index 00000000..1e87a886 --- /dev/null +++ b/src/sleefsimdsp_emulation.c @@ -0,0 +1,788 @@ +/* + +Copyright (c) 2021 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include + +#ifdef ENABLE_VSX +#include "renamevsx.h" +#define nsimd_vec_f32 nsimd_vmx_vf32 +#define get0(a) vec_extract(a, 0) +#define get1(a) vec_extract(a, 1) +#define get2(a) vec_extract(a, 2) +#define get3(a) vec_extract(a, 3) +#define set0(a, b) vec_splats(b) +#define set1(a, b) vec_insert(b, a, 1) +#define set2(a, b) vec_insert(b, a, 2) +#define set3(a, b) vec_insert(b, a, 3) +#endif + +nsimd_vec_f32 xsinf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_sin_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + + +nsimd_vec_f32 xcosf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_cos_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xtanf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_tan_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xasinf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_asin_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xacosf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_acos_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xatanf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_atan_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xatan2f(nsimd_vec_f32 a0_, nsimd_vec_f32 a1_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, a1, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + a1.v0 = get0(a1_); + a1.v1 = get1(a1_); + a1.v2 = get2(a1_); + a1.v3 = get3(a1_); + ret = nsimd_atan2_u35_cpu_f32(a0, a1); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xlogf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_log_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xcbrtf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_cbrt_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xsinf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_sin_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xcosf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_cos_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xtanf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_tan_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xasinf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_asin_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xacosf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_acos_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xatanf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_atan_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xatan2f_u1(nsimd_vec_f32 a0_, nsimd_vec_f32 a1_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, a1, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + a1.v0 = get0(a1_); + a1.v1 = get1(a1_); + a1.v2 = get2(a1_); + a1.v3 = get3(a1_); + ret = nsimd_atan2_u10_cpu_f32(a0, a1); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xlogf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_log_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xcbrtf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_cbrt_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xexpf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_exp_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xpowf(nsimd_vec_f32 a0_, nsimd_vec_f32 a1_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, a1, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + a1.v0 = get0(a1_); + a1.v1 = get1(a1_); + a1.v2 = get2(a1_); + a1.v3 = get3(a1_); + ret = nsimd_pow_u10_cpu_f32(a0, a1); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xsinhf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_sinh_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xcoshf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_cosh_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xtanhf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_tanh_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xsinhf_u35(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_sinh_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xcoshf_u35(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_cosh_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xtanhf_u35(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_tanh_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xasinhf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_asinh_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xacoshf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_acosh_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xatanhf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_atanh_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xexp2f(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_exp2_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xexp2f_u35(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_exp2_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xexp10f(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_exp10_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xexp10f_u35(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_exp10_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xexpm1f(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_expm1_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xlog10f(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_log10_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xlog2f(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_log2_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xlog2f_u35(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_log2_u35_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xlog1pf(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_log1p_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xsinpif_u05(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_sinpi_u05_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xcospif_u05(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_cospi_u05_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xhypotf_u05(nsimd_vec_f32 a0_, nsimd_vec_f32 a1_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, a1, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + a1.v0 = get0(a1_); + a1.v1 = get1(a1_); + a1.v2 = get2(a1_); + a1.v3 = get3(a1_); + ret = nsimd_hypot_u05_cpu_f32(a0, a1); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xhypotf_u35(nsimd_vec_f32 a0_, nsimd_vec_f32 a1_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, a1, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + a1.v0 = get0(a1_); + a1.v1 = get1(a1_); + a1.v2 = get2(a1_); + a1.v3 = get3(a1_); + ret = nsimd_hypot_u35_cpu_f32(a0, a1); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xfmodf(nsimd_vec_f32 a0_, nsimd_vec_f32 a1_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, a1, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + a1.v0 = get0(a1_); + a1.v1 = get1(a1_); + a1.v2 = get2(a1_); + a1.v3 = get3(a1_); + ret = nsimd_fmod_cpu_f32(a0, a1); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xremainderf(nsimd_vec_f32 a0_, nsimd_vec_f32 a1_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, a1, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + a1.v0 = get0(a1_); + a1.v1 = get1(a1_); + a1.v2 = get2(a1_); + a1.v3 = get3(a1_); + ret = nsimd_remainder_cpu_f32(a0, a1); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xlgammaf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_lgamma_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xtgammaf_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_tgamma_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xerff_u1(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_erf_u10_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + +nsimd_vec_f32 xerfcf_u15(nsimd_vec_f32 a0_) { + nsimd_vec_f32 ret_; + nsimd_cpu_vf32 a0, ret; + a0.v0 = get0(a0_); + a0.v1 = get1(a0_); + a0.v2 = get2(a0_); + a0.v3 = get3(a0_); + ret = nsimd_erfc_u15_cpu_f32(a0); + ret_ = set0(ret_, ret.v0); + ret_ = set1(ret_, ret.v1); + ret_ = set2(ret_, ret.v2); + ret_ = set3(ret_, ret.v3); + return ret_; +} + diff --git a/src/sleefsp.c b/src/sleefsp.c new file mode 100644 index 00000000..c90fd46c --- /dev/null +++ b/src/sleefsp.c @@ -0,0 +1,2411 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// Always use -ffp-contract=off option to compile SLEEF. + +#include +#include +#include +#include +#include + +#ifndef ENABLE_BUILTIN_MATH +#include +#define SQRTF sqrtf +#else +#define SQRTF __builtin_sqrtf +#endif + +#include "misc.h" + +extern const float Sleef_rempitabsp[]; + +#ifdef DORENAME +#include "rename.h" +#endif + +#if (defined(_MSC_VER)) +#pragma fp_contract (off) +#endif + +#define MLA mlaf +#define C2V(x) (x) +#include "estrin.h" + +static INLINE CONST int32_t floatToRawIntBits(float d) { + union { + float f; + int32_t i; + } tmp; + tmp.f = d; + return tmp.i; +} + +static INLINE CONST float intBitsToFloat(int32_t i) { + union { + float f; + int32_t i; + } tmp; + tmp.i = i; + return tmp.f; +} + +static INLINE CONST float fabsfk(float x) { + return intBitsToFloat(0x7fffffffL & floatToRawIntBits(x)); +} + +static INLINE CONST float mulsignf(float x, float y) { + return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31))); +} + +static INLINE CONST float copysignfk(float x, float y) { + return intBitsToFloat((floatToRawIntBits(x) & ~(1 << 31)) ^ (floatToRawIntBits(y) & (1 << 31))); +} + +static INLINE CONST float signf(float d) { return mulsignf(1, d); } +static INLINE CONST float mlaf(float x, float y, float z) { return x * y + z; } +static INLINE CONST float rintfk(float x) { return x < 0 ? (int)(x - 0.5f) : (int)(x + 0.5f); } +static INLINE CONST int ceilfk(float x) { return (int)x + (x < 0 ? 0 : 1); } +static INLINE CONST float fminfk(float x, float y) { return x < y ? x : y; } +static INLINE CONST float fmaxfk(float x, float y) { return x > y ? x : y; } +static INLINE CONST int xisintf(float x) { return (x == (int)x); } + +static INLINE CONST int xisnanf(float x) { return x != x; } +static INLINE CONST int xisinff(float x) { return x == SLEEF_INFINITYf || x == -SLEEF_INFINITYf; } +static INLINE CONST int xisminff(float x) { return x == -SLEEF_INFINITYf; } +static INLINE CONST int xispinff(float x) { return x == SLEEF_INFINITYf; } +static INLINE CONST int xisnegzerof(float x) { return floatToRawIntBits(x) == floatToRawIntBits(-0.0); } +static INLINE CONST int xisnumberf(float x) { return !xisinff(x) && !xisnanf(x); } + +static INLINE CONST int ilogbkf(float d) { + int m = d < 5.421010862427522E-20f; + d = m ? 1.8446744073709552E19f * d : d; + int q = (floatToRawIntBits(d) >> 23) & 0xff; + q = m ? q - (64 + 0x7f) : q - 0x7f; + return q; +} + +// vilogb2kf is similar to ilogbkf, but the argument has to be a +// normalized FP value. +static INLINE CONST int ilogb2kf(float d) { + return ((floatToRawIntBits(d) >> 23) & 0xff) - 0x7f; +} + +EXPORT CONST int xilogbf(float d) { + int e = ilogbkf(fabsfk(d)); + e = d == 0.0f ? SLEEF_FP_ILOGB0 : e; + e = xisnanf(d) ? SLEEF_FP_ILOGBNAN : e; + e = xisinff(d) ? INT_MAX : e; + return e; +} + +static INLINE CONST float pow2if(int q) { + return intBitsToFloat(((int32_t)(q + 0x7f)) << 23); +} + +static INLINE CONST float ldexpkf(float x, int q) { + float u; + int m; + m = q >> 31; + m = (((m + q) >> 6) - m) << 4; + q = q - (m << 2); + m += 127; + m = m < 0 ? 0 : m; + m = m > 255 ? 255 : m; + u = intBitsToFloat(((int32_t)m) << 23); + x = x * u * u * u * u; + u = intBitsToFloat(((int32_t)(q + 0x7f)) << 23); + return x * u; +} + +static INLINE CONST float ldexp2kf(float d, int e) { // faster than ldexpkf, short reach + return d * pow2if(e >> 1) * pow2if(e - (e >> 1)); +} + +static INLINE CONST float ldexp3kf(float d, int e) { // very fast, no denormal + return intBitsToFloat(floatToRawIntBits(d) + (e << 23)); +} + +// + +#ifndef NDEBUG +static int checkfp(float x) { + if (xisinff(x) || xisnanf(x)) return 1; + return 0; +} +#endif + +static INLINE CONST float upperf(float d) { + return intBitsToFloat(floatToRawIntBits(d) & 0xfffff000); +} + +static INLINE CONST Sleef_float2 df(float h, float l) { + Sleef_float2 ret; + ret.x = h; ret.y = l; + return ret; +} + +static INLINE CONST Sleef_float2 dfx(double d) { + Sleef_float2 ret; + ret.x = d; ret.y = d - ret.x; + return ret; +} + +static INLINE CONST Sleef_float2 dfnormalize_f2_f2(Sleef_float2 t) { + Sleef_float2 s; + + s.x = t.x + t.y; + s.y = t.x - s.x + t.y; + + return s; +} + +static INLINE CONST Sleef_float2 dfscale_f2_f2_f(Sleef_float2 d, float s) { + Sleef_float2 r; + + r.x = d.x * s; + r.y = d.y * s; + + return r; +} + +static INLINE CONST Sleef_float2 dfneg_f2_f2(Sleef_float2 d) { + Sleef_float2 r; + + r.x = -d.x; + r.y = -d.y; + + return r; +} + +static INLINE CONST Sleef_float2 dfabs_f2_f2(Sleef_float2 x) { + return df(x.x < 0 ? -x.x : x.x, x.x < 0 ? -x.y : x.y); +} + +static INLINE CONST Sleef_float2 dfadd_f2_f_f(float x, float y) { + // |x| >= |y| + + Sleef_float2 r; + +#ifndef NDEBUG + if (!(checkfp(x) || checkfp(y) || fabsfk(x) >= fabsfk(y))) fprintf(stderr, "[dfadd_f2_f_f : %g, %g]", x, y); +#endif + + r.x = x + y; + r.y = x - r.x + y; + + return r; +} + +static INLINE CONST Sleef_float2 dfadd2_f2_f_f(float x, float y) { + Sleef_float2 r; + + r.x = x + y; + float v = r.x - x; + r.y = (x - (r.x - v)) + (y - v); + + return r; +} + +static INLINE CONST Sleef_float2 dfadd_f2_f2_f(Sleef_float2 x, float y) { + // |x| >= |y| + + Sleef_float2 r; + +#ifndef NDEBUG + if (!(checkfp(x.x) || checkfp(y) || fabsfk(x.x) >= fabsfk(y))) fprintf(stderr, "[dfadd_f2_f2_f : %g %g]", x.x, y); +#endif + + r.x = x.x + y; + r.y = x.x - r.x + y + x.y; + + return r; +} + +static INLINE CONST Sleef_float2 dfadd_f2_f_f2(float x, Sleef_float2 y) { + // |x| >= |y| + + Sleef_float2 r; + +#ifndef NDEBUG + if (!(checkfp(x) || checkfp(y.x) || fabsfk(x) >= fabsfk(y.x))) { + fprintf(stderr, "[dfadd_f2_f_f2 : %g %g]\n", x, y.x); + fflush(stderr); + } +#endif + + r.x = x + y.x; + r.y = x - r.x + y.x + y.y; + + return r; +} + +static INLINE CONST Sleef_float2 dfadd2_f2_f2_f(Sleef_float2 x, float y) { + // |x| >= |y| + + Sleef_float2 r; + + r.x = x.x + y; + float v = r.x - x.x; + r.y = (x.x - (r.x - v)) + (y - v); + r.y += x.y; + + return r; +} + +static INLINE CONST Sleef_float2 dfadd2_f2_f_f2(float x, Sleef_float2 y) { + Sleef_float2 r; + + r.x = x + y.x; + float v = r.x - x; + r.y = (x - (r.x - v)) + (y.x - v) + y.y; + + return r; +} + +static INLINE CONST Sleef_float2 dfadd_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { + // |x| >= |y| + + Sleef_float2 r; + +#ifndef NDEBUG + if (!(checkfp(x.x) || checkfp(y.x) || fabsfk(x.x) >= fabsfk(y.x))) fprintf(stderr, "[dfadd_f2_f2_f2 : %g %g]", x.x, y.x); +#endif + + r.x = x.x + y.x; + r.y = x.x - r.x + y.x + x.y + y.y; + + return r; +} + +static INLINE CONST Sleef_float2 dfadd2_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { + Sleef_float2 r; + + r.x = x.x + y.x; + float v = r.x - x.x; + r.y = (x.x - (r.x - v)) + (y.x - v); + r.y += x.y + y.y; + + return r; +} + +static INLINE CONST Sleef_float2 dfsub_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { + // |x| >= |y| + + Sleef_float2 r; + +#ifndef NDEBUG + if (!(checkfp(x.x) || checkfp(y.x) || fabsfk(x.x) >= fabsfk(y.x))) fprintf(stderr, "[dfsub_f2_f2_f2 : %g %g]", x.x, y.x); +#endif + + r.x = x.x - y.x; + r.y = x.x - r.x - y.x + x.y - y.y; + + return r; +} + +static INLINE CONST Sleef_float2 dfdiv_f2_f2_f2(Sleef_float2 n, Sleef_float2 d) { + float t = 1.0f / d.x; + float dh = upperf(d.x), dl = d.x - dh; + float th = upperf(t ), tl = t - th; + float nhh = upperf(n.x), nhl = n.x - nhh; + + Sleef_float2 q; + + q.x = n.x * t; + + float u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); + + q.y = t * (n.y - q.x * d.y) + u; + + return q; +} + +static INLINE CONST Sleef_float2 dfmul_f2_f_f(float x, float y) { + float xh = upperf(x), xl = x - xh; + float yh = upperf(y), yl = y - yh; + Sleef_float2 r; + + r.x = x * y; + r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; + + return r; +} + +static INLINE CONST Sleef_float2 dfmul_f2_f2_f(Sleef_float2 x, float y) { + float xh = upperf(x.x), xl = x.x - xh; + float yh = upperf(y ), yl = y - yh; + Sleef_float2 r; + + r.x = x.x * y; + r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; + + return r; +} + +static INLINE CONST Sleef_float2 dfmul_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { + float xh = upperf(x.x), xl = x.x - xh; + float yh = upperf(y.x), yl = y.x - yh; + Sleef_float2 r; + + r.x = x.x * y.x; + r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; + + return r; +} + +static INLINE CONST float dfmul_f_f2_f2(Sleef_float2 x, Sleef_float2 y) { + float xh = upperf(x.x), xl = x.x - xh; + float yh = upperf(y.x), yl = y.x - yh; + + return x.y * yh + xh * y.y + xl * yl + xh * yl + xl * yh + xh * yh; +} + +static INLINE CONST Sleef_float2 dfsqu_f2_f2(Sleef_float2 x) { + float xh = upperf(x.x), xl = x.x - xh; + Sleef_float2 r; + + r.x = x.x * x.x; + r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); + + return r; +} + +static INLINE CONST float dfsqu_f_f2(Sleef_float2 x) { + float xh = upperf(x.x), xl = x.x - xh; + + return xh * x.y + xh * x.y + xl * xl + (xh * xl + xh * xl) + xh * xh; +} + +static INLINE CONST Sleef_float2 dfrec_f2_f(float d) { + float t = 1.0f / d; + float dh = upperf(d), dl = d - dh; + float th = upperf(t), tl = t - th; + Sleef_float2 q; + + q.x = t; + q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); + + return q; +} + +static INLINE CONST Sleef_float2 dfrec_f2_f2(Sleef_float2 d) { + float t = 1.0f / d.x; + float dh = upperf(d.x), dl = d.x - dh; + float th = upperf(t ), tl = t - th; + Sleef_float2 q; + + q.x = t; + q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); + + return q; +} + +static INLINE CONST Sleef_float2 dfsqrt_f2_f2(Sleef_float2 d) { + float t = SQRTF(d.x + d.y); + return dfscale_f2_f2_f(dfmul_f2_f2_f2(dfadd2_f2_f2_f2(d, dfmul_f2_f_f(t, t)), dfrec_f2_f(t)), 0.5f); +} + +static INLINE CONST Sleef_float2 dfsqrt_f2_f(float d) { + float t = SQRTF(d); + return dfscale_f2_f2_f(dfmul_f2_f2_f2(dfadd2_f2_f_f2(d, dfmul_f2_f_f(t, t)), dfrec_f2_f(t)), 0.5); +} + +// + +typedef struct { + float d; + int32_t i; +} fi_t; + +typedef struct { + Sleef_float2 df; + int32_t i; +} dfi_t; + +static CONST fi_t rempisubf(float x) { + fi_t ret; + float fr = x - (float)(INT64_C(1) << 10) * (int32_t)(x * (1.0f / (INT64_C(1) << 10))); + ret.i = ((7 & ((x > 0 ? 4 : 3) + (int32_t)(fr * 8))) - 3) >> 1; + fr = fr - 0.25f * (int32_t)(fr * 4 + mulsignf(0.5f, x)); + fr = fabsfk(fr) > 0.125f ? (fr - mulsignf(0.5f, x)) : fr; + fr = fabsfk(fr) > 1e+10f ? 0 : fr; + if (fabsfk(x) == 0.12499999254941940308f) { fr = x; ret.i = 0; } + ret.d = fr; + return ret; +} + +static CONST dfi_t rempif(float a) { + Sleef_float2 x, y, z; + fi_t di; + float t; + int ex = ilogb2kf(a) - 25, q = ex > (90 - 25) ? -64 : 0; + a = ldexp3kf(a, q); + if (ex < 0) ex = 0; + ex *= 4; + x = dfmul_f2_f_f(a, Sleef_rempitabsp[ex]); + di = rempisubf(x.x); + q = di.i; + x.x = di.d; + x = dfnormalize_f2_f2(x); + y = dfmul_f2_f_f(a, Sleef_rempitabsp[ex+1]); + x = dfadd2_f2_f2_f2(x, y); + di = rempisubf(x.x); + q += di.i; + x.x = di.d; + x = dfnormalize_f2_f2(x); + y = dfmul_f2_f2_f(df(Sleef_rempitabsp[ex+2], Sleef_rempitabsp[ex+3]), a); + x = dfadd2_f2_f2_f2(x, y); + x = dfnormalize_f2_f2(x); + x = dfmul_f2_f2_f2(x, df(3.1415927410125732422f*2, -8.7422776573475857731e-08f*2)); + dfi_t ret = { fabsfk(a) < 0.7f ? df(a, 0) : x, q }; + return ret; +} + +EXPORT CONST float xsinf(float d) { + int q; + float u, s, t = d; + + if (fabsfk(d) < TRIGRANGEMAX2f) { + q = (int)rintfk(d * (float)M_1_PI); + d = mlaf(q, -PI_A2f, d); + d = mlaf(q, -PI_B2f, d); + d = mlaf(q, -PI_C2f, d); + } else if (fabsfk(d) < TRIGRANGEMAXf) { + q = (int)rintfk(d * (float)M_1_PI); + d = mlaf(q, -PI_Af, d); + d = mlaf(q, -PI_Bf, d); + d = mlaf(q, -PI_Cf, d); + d = mlaf(q, -PI_Df, d); + } else { + dfi_t dfi = rempif(t); + q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 1) >> 2; + if ((dfi.i & 1) != 0) { + dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x), + mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x))); + } + d = dfi.df.x + dfi.df.y; + if (xisinff(t) || xisnanf(t)) d = SLEEF_NANf; + } + + s = d * d; + + if ((q & 1) != 0) d = -d; + + u = 2.6083159809786593541503e-06f; + u = mlaf(u, s, -0.0001981069071916863322258f); + u = mlaf(u, s, 0.00833307858556509017944336f); + u = mlaf(u, s, -0.166666597127914428710938f); + + u = mlaf(s, u * d, d); + + if (xisnegzerof(t)) u = -0.0f; + + return u; +} + +EXPORT CONST float xsinf_u1(float d) { + int q; + float u; + Sleef_float2 s, t, x; + + if (fabsfk(d) < TRIGRANGEMAX2f) { + q = (int)rintfk(d * (float)M_1_PI); + u = mlaf(q, -PI_A2f, d); + s = dfadd2_f2_f_f(u, q * (-PI_B2f)); + s = dfadd_f2_f2_f(s, q * (-PI_C2f)); + } else { + dfi_t dfi = rempif(d); + q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 1) >> 2; + if ((dfi.i & 1) != 0) { + dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x), + mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x))); + } + s = dfnormalize_f2_f2(dfi.df); + if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; + } + + t = s; + s = dfsqu_f2_f2(s); + + u = 2.6083159809786593541503e-06f; + u = mlaf(u, s.x, -0.0001981069071916863322258f); + u = mlaf(u, s.x, 0.00833307858556509017944336f); + + x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f(-0.166666597127914428710938f, u * s.x), s)); + + u = dfmul_f_f2_f2(t, x); + + if ((q & 1) != 0) u = -u; + if (xisnegzerof(d)) u = d; + + return u; +} + +EXPORT CONST float xcosf(float d) { + int q; + float u, s, t = d; + + if (fabsfk(d) < TRIGRANGEMAX2f) { + q = 1 + 2*(int)rintfk(d * (float)M_1_PI - 0.5f); + d = mlaf(q, -PI_A2f*0.5f, d); + d = mlaf(q, -PI_B2f*0.5f, d); + d = mlaf(q, -PI_C2f*0.5f, d); + } else if (fabsfk(d) < TRIGRANGEMAXf) { + q = 1 + 2*(int)rintfk(d * (float)M_1_PI - 0.5f); + d = mlaf(q, -PI_Af*0.5f, d); + d = mlaf(q, -PI_Bf*0.5f, d); + d = mlaf(q, -PI_Cf*0.5f, d); + d = mlaf(q, -PI_Df*0.5f, d); + } else { + dfi_t dfi = rempif(t); + q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 7) >> 1; + if ((dfi.i & 1) == 0) { + dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x > 0 ? 1 : -1), + mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x > 0 ? 1 : -1))); + } + d = dfi.df.x + dfi.df.y; + if (xisinff(t) || xisnanf(t)) d = SLEEF_NANf; + } + + s = d * d; + + if ((q & 2) == 0) d = -d; + + u = 2.6083159809786593541503e-06f; + u = mlaf(u, s, -0.0001981069071916863322258f); + u = mlaf(u, s, 0.00833307858556509017944336f); + u = mlaf(u, s, -0.166666597127914428710938f); + + u = mlaf(s, u * d, d); + + return u; +} + +EXPORT CONST float xcosf_u1(float d) { + float u; + Sleef_float2 s, t, x; + int q; + + if (fabsfk(d) < TRIGRANGEMAX2f) { + d = fabsfk(d); + float dq = mlaf(rintfk(d * (float)M_1_PI - 0.5f), 2, 1); + q = (int)dq; + s = dfadd2_f2_f_f (d, dq * (-PI_A2f*0.5f)); + s = dfadd2_f2_f2_f(s, dq * (-PI_B2f*0.5f)); + s = dfadd2_f2_f2_f(s, dq * (-PI_C2f*0.5f)); + } else { + dfi_t dfi = rempif(d); + q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 7) >> 1; + if ((dfi.i & 1) == 0) { + dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x > 0 ? 1 : -1), + mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x > 0 ? 1 : -1))); + } + s = dfnormalize_f2_f2(dfi.df); + if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; + } + + t = s; + s = dfsqu_f2_f2(s); + + u = 2.6083159809786593541503e-06f; + u = mlaf(u, s.x, -0.0001981069071916863322258f); + u = mlaf(u, s.x, 0.00833307858556509017944336f); + + x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f(-0.166666597127914428710938f, u * s.x), s)); + + u = dfmul_f_f2_f2(t, x); + + if ((((int)q) & 2) == 0) u = -u; + + return u; +} + +EXPORT CONST float xfastsinf_u3500(float d) { + int q; + float u, s, t = d; + + q = rintfk(d * (float)M_1_PI); + d = mlaf(q, -(float)M_PI, d); + + s = d * d; + + u = -0.1881748176e-3; + u = mlaf(u, s, +0.8323502727e-2); + u = mlaf(u, s, -0.1666651368e+0); + u = mlaf(s * d, u, d); + + if ((q & 1) != 0) u = -u; + + if (UNLIKELY(fabsfk(t) > 30.0f)) return xsinf(t); + + return u; +} + +EXPORT CONST float xfastcosf_u3500(float d) { + int q; + float u, s, t = d; + + q = rintfk(mlaf(d, (float)M_1_PI, -0.5f)); + d = mlaf(q, -(float)M_PI, d - (float)M_PI*0.5f); + + s = d * d; + + u = -0.1881748176e-3; + u = mlaf(u, s, +0.8323502727e-2); + u = mlaf(u, s, -0.1666651368e+0); + u = mlaf(s * d, u, d); + + if ((q & 1) == 0) u = -u; + + if (UNLIKELY(fabsfk(t) > 30.0f)) return xcosf(t); + + return u; +} + +EXPORT CONST Sleef_float2 xsincosf(float d) { + int q; + float u, s, t; + Sleef_float2 r; + + s = d; + + if (fabsfk(d) < TRIGRANGEMAX2f) { + q = (int)rintfk(d * ((float)(2 * M_1_PI))); + s = mlaf(q, -PI_A2f*0.5f, s); + s = mlaf(q, -PI_B2f*0.5f, s); + s = mlaf(q, -PI_C2f*0.5f, s); + } else if (fabsfk(d) < TRIGRANGEMAXf) { + q = (int)rintfk(d * ((float)(2 * M_1_PI))); + s = mlaf(q, -PI_Af*0.5f, s); + s = mlaf(q, -PI_Bf*0.5f, s); + s = mlaf(q, -PI_Cf*0.5f, s); + s = mlaf(q, -PI_Df*0.5f, s); + } else { + dfi_t dfi = rempif(d); + q = dfi.i; + s = dfi.df.x + dfi.df.y; + if (xisinff(d) || xisnanf(d)) s = SLEEF_NANf; + } + + t = s; + + s = s * s; + + u = -0.000195169282960705459117889f; + u = mlaf(u, s, 0.00833215750753879547119141f); + u = mlaf(u, s, -0.166666537523269653320312f); + u = u * s * t; + + r.x = t + u; + + if (xisnegzerof(d)) r.x = -0.0f; + + u = -2.71811842367242206819355e-07f; + u = mlaf(u, s, 2.47990446951007470488548e-05f); + u = mlaf(u, s, -0.00138888787478208541870117f); + u = mlaf(u, s, 0.0416666641831398010253906f); + u = mlaf(u, s, -0.5f); + + r.y = u * s + 1; + + if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } + if ((q & 2) != 0) { r.x = -r.x; } + if (((q+1) & 2) != 0) { r.y = -r.y; } + + return r; +} + +EXPORT CONST Sleef_float2 xsincosf_u1(float d) { + int q; + float u; + Sleef_float2 r, s, t, x; + + if (fabsfk(d) < TRIGRANGEMAX2f) { + q = (int)rintfk(d * (float)(2 * M_1_PI)); + u = mlaf(q, -PI_A2f*0.5f, d); + s = dfadd2_f2_f_f(u, q * (-PI_B2f*0.5f)); + s = dfadd_f2_f2_f(s, q * (-PI_C2f*0.5f)); + } else { + dfi_t dfi = rempif(d); + q = dfi.i; + s = dfi.df; + if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; + } + + t = s; + s.x = dfsqu_f_f2(s); + + u = -0.000195169282960705459117889f; + u = mlaf(u, s.x, 0.00833215750753879547119141f); + u = mlaf(u, s.x, -0.166666537523269653320312f); + + u *= s.x * t.x; + + x = dfadd_f2_f2_f(t, u); + r.x = x.x + x.y; + if (xisnegzerof(d)) r.x = -0.0f; + + u = -2.71811842367242206819355e-07f; + u = mlaf(u, s.x, 2.47990446951007470488548e-05f); + u = mlaf(u, s.x, -0.00138888787478208541870117f); + u = mlaf(u, s.x, 0.0416666641831398010253906f); + u = mlaf(u, s.x, -0.5f); + + x = dfadd_f2_f_f2(1, dfmul_f2_f_f(s.x, u)); + r.y = x.x + x.y; + + if ((q & 1) != 0) { u = r.y; r.y = r.x; r.x = u; } + if ((q & 2) != 0) { r.x = -r.x; } + if (((q+1) & 2) != 0) { r.y = -r.y; } + + return r; +} + +EXPORT CONST Sleef_float2 xsincospif_u05(float d) { + float u, s, t; + Sleef_float2 r, x, s2; + + u = d * 4; + int q = ceilfk(u) & ~(int)1; + + s = u - (float)q; + t = s; + s = s * s; + s2 = dfmul_f2_f_f(t, t); + + // + + u = +0.3093842054e-6; + u = mlaf(u, s, -0.3657307388e-4); + u = mlaf(u, s, +0.2490393585e-2); + x = dfadd2_f2_f_f2(u * s, df(-0.080745510756969451904, -1.3373665339076936258e-09)); + x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), df(0.78539818525314331055, -2.1857338617566484855e-08)); + + x = dfmul_f2_f2_f(x, t); + r.x = x.x + x.y; + if (xisnegzerof(d)) r.x = -0.0f; + + u = -0.2430611801e-7; + u = mlaf(u, s, +0.3590577080e-5); + u = mlaf(u, s, -0.3259917721e-3); + x = dfadd2_f2_f_f2(u * s, df(0.015854343771934509277, 4.4940051354032242811e-10)); + x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), df(-0.30842512845993041992, -9.0728339030733922277e-09)); + + x = dfadd2_f2_f2_f(dfmul_f2_f2_f2(x, s2), 1); + r.y = x.x + x.y; + + if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } + if ((q & 4) != 0) { r.x = -r.x; } + if (((q+2) & 4) != 0) { r.y = -r.y; } + + if (fabsfk(d) > 1e+7f) { r.x = 0; r.y = 1; } + if (xisinff(d)) { r.x = r.y = SLEEF_NANf; } + + return r; +} + +EXPORT CONST Sleef_float2 xsincospif_u35(float d) { + float u, s, t; + Sleef_float2 r; + + u = d * 4; + int q = ceilfk(u) & ~(int)1; + + s = u - (float)q; + t = s; + s = s * s; + + // + + u = -0.3600925265e-4; + u = mlaf(u, s, +0.2490088111e-2); + u = mlaf(u, s, -0.8074551076e-1); + u = mlaf(u, s, +0.7853981853e+0); + + r.x = u * t; + + u = +0.3539815225e-5; + u = mlaf(u, s, -0.3259574005e-3); + u = mlaf(u, s, +0.1585431583e-1); + u = mlaf(u, s, -0.3084251285e+0); + u = mlaf(u, s, 1); + + r.y = u; + + if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } + if ((q & 4) != 0) { r.x = -r.x; } + if (((q+2) & 4) != 0) { r.y = -r.y; } + + if (fabsfk(d) > 1e+7f) { r.x = 0; r.y = 1; } + if (xisinff(d)) { r.x = r.y = SLEEF_NANf; } + + return r; +} + +EXPORT CONST float xtanf(float d) { + int q; + float u, s, x; + + x = d; + + if (fabsfk(d) < TRIGRANGEMAX2f*0.5f) { + q = (int)rintfk(d * (float)(2 * M_1_PI)); + x = mlaf(q, -PI_A2f*0.5f, x); + x = mlaf(q, -PI_B2f*0.5f, x); + x = mlaf(q, -PI_C2f*0.5f, x); + } else if (fabsfk(d) < TRIGRANGEMAXf) { + q = (int)rintfk(d * (float)(2 * M_1_PI)); + x = mlaf(q, -PI_Af*0.5f, x); + x = mlaf(q, -PI_Bf*0.5f, x); + x = mlaf(q, -PI_Cf*0.5f, x); + x = mlaf(q, -PI_Df*0.5f, x); + } else { + dfi_t dfi = rempif(d); + q = dfi.i; + x = dfi.df.x + dfi.df.y; + if (xisinff(d) || xisnanf(d)) x = SLEEF_NANf; + } + + s = x * x; + + if ((q & 1) != 0) x = -x; + + float s2 = s * s, s4 = s2 * s2; + u = POLY6(s, s2, s4, + 0.00927245803177356719970703f, + 0.00331984995864331722259521f, + 0.0242998078465461730957031f, + 0.0534495301544666290283203f, + 0.133383005857467651367188f, + 0.333331853151321411132812f); + + u = mlaf(s, u * x, x); + + if ((q & 1) != 0) u = 1.0f / u; + + return u; +} + +EXPORT CONST float xtanf_u1(float d) { + int q; + float u; + Sleef_float2 s, t, x; + + if (fabsfk(d) < TRIGRANGEMAX2f) { + q = (int)rintfk(d * (float)(2 * M_1_PI)); + u = mlaf(q, -PI_A2f*0.5f, d); + s = dfadd2_f2_f_f(u, q * (-PI_B2f*0.5f)); + s = dfadd_f2_f2_f(s, q * (-PI_C2f*0.5f)); + } else { + dfi_t dfi = rempif(d); + q = dfi.i; + s = dfi.df; + if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; + } + + if ((q & 1) != 0) s = dfneg_f2_f2(s); + + t = s; + s = dfsqu_f2_f2(s); + s = dfnormalize_f2_f2(s); + + u = 0.00446636462584137916564941f; + u = mlaf(u, s.x, -8.3920182078145444393158e-05f); + u = mlaf(u, s.x, 0.0109639242291450500488281f); + u = mlaf(u, s.x, 0.0212360303848981857299805f); + u = mlaf(u, s.x, 0.0540687143802642822265625f); + + x = dfadd_f2_f_f(0.133325666189193725585938f, u * s.x); + x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f2(0.33333361148834228515625f, dfmul_f2_f2_f2(s, x)), s)); + x = dfmul_f2_f2_f2(t, x); + + if ((q & 1) != 0) x = dfrec_f2_f2(x); + + u = x.x + x.y; + + if (xisnegzerof(d)) u = -0.0f; + + return u; +} + +EXPORT CONST float xatanf(float s) { + float t, u; + int q = 0; + + if (signf(s) == -1) { s = -s; q = 2; } + if (s > 1) { s = 1.0f / s; q |= 1; } + + t = s * s; + + float t2 = t * t, t4 = t2 * t2; + u = POLY8(t, t2, t4, + 0.00282363896258175373077393f, + -0.0159569028764963150024414f, + 0.0425049886107444763183594f, + -0.0748900920152664184570312f, + 0.106347933411598205566406f, + -0.142027363181114196777344f, + 0.199926957488059997558594f, + -0.333331018686294555664062f); + + t = s + s * (t * u); + + if ((q & 1) != 0) t = 1.570796326794896557998982f - t; + if ((q & 2) != 0) t = -t; + + return t; +} + +static INLINE CONST float atan2kf(float y, float x) { + float s, t, u; + int q = 0; + + if (x < 0) { x = -x; q = -2; } + if (y > x) { t = x; x = y; y = -t; q += 1; } + + s = y / x; + t = s * s; + + float t2 = t * t, t4 = t2 * t2; + u = POLY8(t, t2, t4, + 0.00282363896258175373077393f, + -0.0159569028764963150024414f, + 0.0425049886107444763183594f, + -0.0748900920152664184570312f, + 0.106347933411598205566406f, + -0.142027363181114196777344f, + 0.199926957488059997558594f, + -0.333331018686294555664062f); + + t = u * t * s + s; + t = q * (float)(M_PI/2) + t; + + return t; +} + +EXPORT CONST float xatan2f(float y, float x) { + float r = atan2kf(fabsfk(y), x); + + r = mulsignf(r, x); + if (xisinff(x) || x == 0) r = M_PIf/2 - (xisinff(x) ? (signf(x) * (float)(M_PI /2)) : 0); + if (xisinff(y) ) r = M_PIf/2 - (xisinff(x) ? (signf(x) * (float)(M_PI*1/4)) : 0); + if ( y == 0) r = (signf(x) == -1 ? M_PIf : 0); + + return xisnanf(x) || xisnanf(y) ? SLEEF_NANf : mulsignf(r, y); +} + +EXPORT CONST float xasinf(float d) { + int o = fabsfk(d) < 0.5f; + float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), x = o ? fabsfk(d) : SQRTF(x2), u; + + u = +0.4197454825e-1; + u = mlaf(u, x2, +0.2424046025e-1); + u = mlaf(u, x2, +0.4547423869e-1); + u = mlaf(u, x2, +0.7495029271e-1); + u = mlaf(u, x2, +0.1666677296e+0); + u = mlaf(u, x * x2, x); + + float r = o ? u : (M_PIf/2 - 2*u); + r = mulsignf(r, d); + + return r; +} + +EXPORT CONST float xacosf(float d) { + int o = fabsfk(d) < 0.5f; + float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), u; + float x = o ? fabsfk(d) : SQRTF(x2); + x = fabsfk(d) == 1.0 ? 0 : x; + + u = +0.4197454825e-1; + u = mlaf(u, x2, +0.2424046025e-1); + u = mlaf(u, x2, +0.4547423869e-1); + u = mlaf(u, x2, +0.7495029271e-1); + u = mlaf(u, x2, +0.1666677296e+0); + + u *= x * x2; + + float y = 3.1415926535897932f/2 - (mulsignf(x, d) + mulsignf(u, d)); + x += u; + float r = o ? y : (x*2); + if (!o && d < 0) r = dfadd_f2_f2_f(df(3.1415927410125732422f,-8.7422776573475857731e-08f), -r).x; + + return r; +} + +static Sleef_float2 atan2kf_u1(Sleef_float2 y, Sleef_float2 x) { + float u; + Sleef_float2 s, t; + int q = 0; + + if (x.x < 0) { x.x = -x.x; x.y = -x.y; q = -2; } + if (y.x > x.x) { t = x; x = y; y.x = -t.x; y.y = -t.y; q += 1; } + + s = dfdiv_f2_f2_f2(y, x); + t = dfsqu_f2_f2(s); + t = dfnormalize_f2_f2(t); + + u = -0.00176397908944636583328247f; + u = mlaf(u, t.x, 0.0107900900766253471374512f); + u = mlaf(u, t.x, -0.0309564601629972457885742f); + u = mlaf(u, t.x, 0.0577365085482597351074219f); + u = mlaf(u, t.x, -0.0838950723409652709960938f); + u = mlaf(u, t.x, 0.109463557600975036621094f); + u = mlaf(u, t.x, -0.142626821994781494140625f); + u = mlaf(u, t.x, 0.199983194470405578613281f); + + t = dfmul_f2_f2_f2(t, dfadd_f2_f_f(-0.333332866430282592773438f, u * t.x)); + t = dfmul_f2_f2_f2(s, dfadd_f2_f_f2(1, t)); + t = dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(1.5707963705062866211f, -4.3711388286737928865e-08f), q), t); + + return t; +} + +EXPORT CONST float xatan2f_u1(float y, float x) { + if (fabsfk(x) < 2.9387372783541830947e-39f) { y *= (UINT64_C(1) << 24); x *= (UINT64_C(1) << 24); } // nexttowardf((1.0 / FLT_MAX), 1) + Sleef_float2 d = atan2kf_u1(df(fabsfk(y), 0), df(x, 0)); + float r = d.x + d.y; + + r = mulsignf(r, x); + if (xisinff(x) || x == 0) r = (float)M_PI/2 - (xisinff(x) ? (signf(x) * (float)(M_PI /2)) : 0.0f); + if (xisinff(y) ) r = (float)M_PI/2 - (xisinff(x) ? (signf(x) * (float)(M_PI*1/4)) : 0.0f); + if ( y == 0) r = (signf(x) == -1 ? (float)M_PI : 0.0f); + + return xisnanf(x) || xisnanf(y) ? SLEEF_NANf : mulsignf(r, y); +} + +EXPORT CONST float xasinf_u1(float d) { + int o = fabsfk(d) < 0.5f; + float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), u; + Sleef_float2 x = o ? df(fabsfk(d), 0) : dfsqrt_f2_f(x2); + x = fabsfk(d) == 1.0f ? df(0, 0) : x; + + u = +0.4197454825e-1; + u = mlaf(u, x2, +0.2424046025e-1); + u = mlaf(u, x2, +0.4547423869e-1); + u = mlaf(u, x2, +0.7495029271e-1); + u = mlaf(u, x2, +0.1666677296e+0); + u *= x2 * x.x; + + Sleef_float2 y = dfadd_f2_f2_f(dfsub_f2_f2_f2(df(3.1415927410125732422f/4,-8.7422776573475857731e-08f/4), x), -u); + float r = o ? (u + x.x) : ((y.x + y.y)*2); + r = mulsignf(r, d); + + return r; +} + +EXPORT CONST float xacosf_u1(float d) { + int o = fabsfk(d) < 0.5f; + float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), u; + Sleef_float2 x = o ? df(fabsfk(d), 0) : dfsqrt_f2_f(x2); + x = fabsfk(d) == 1.0 ? df(0, 0) : x; + + u = +0.4197454825e-1; + u = mlaf(u, x2, +0.2424046025e-1); + u = mlaf(u, x2, +0.4547423869e-1); + u = mlaf(u, x2, +0.7495029271e-1); + u = mlaf(u, x2, +0.1666677296e+0); + + u = u * x.x * x2; + + Sleef_float2 y = dfsub_f2_f2_f2(df(3.1415927410125732422f/2,-8.7422776573475857731e-08f/2), + dfadd_f2_f_f(mulsignf(x.x, d), mulsignf(u, d))); + x = dfadd_f2_f2_f(x, u); + y = o ? y : dfscale_f2_f2_f(x, 2); + if (!o && d < 0) y = dfsub_f2_f2_f2(df(3.1415927410125732422f,-8.7422776573475857731e-08f), y); + + return y.x + y.y; +} + +EXPORT CONST float xatanf_u1(float d) { + Sleef_float2 d2 = atan2kf_u1(df(fabsfk(d), 0.0f), df(1.0f, 0.0f)); + float r = d2.x + d2.y; + if (xisinff(d)) r = 1.570796326794896557998982f; + return mulsignf(r, d); +} + +EXPORT CONST float xlogf(float d) { + float x, x2, t, m; + int e; + + int o = d < FLT_MIN; + if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); + + e = ilogb2kf(d * (1.0f/0.75f)); + m = ldexp3kf(d, -e); + + if (o) e -= 64; + + x = (m-1.0f) / (m+1.0f); + x2 = x * x; + + t = 0.2392828464508056640625f; + t = mlaf(t, x2, 0.28518211841583251953125f); + t = mlaf(t, x2, 0.400005877017974853515625f); + t = mlaf(t, x2, 0.666666686534881591796875f); + t = mlaf(t, x2, 2.0f); + + x = x * t + 0.693147180559945286226764f * e; + + if (xisinff(d)) x = SLEEF_INFINITYf; + if (d < 0 || xisnanf(d)) x = SLEEF_NANf; + if (d == 0) x = -SLEEF_INFINITYf; + + return x; +} + +EXPORT CONST float xexpf(float d) { + int q = (int)rintfk(d * R_LN2f); + float s, u; + + s = mlaf(q, -L2Uf, d); + s = mlaf(q, -L2Lf, s); + + u = 0.000198527617612853646278381; + u = mlaf(u, s, 0.00139304355252534151077271); + u = mlaf(u, s, 0.00833336077630519866943359); + u = mlaf(u, s, 0.0416664853692054748535156); + u = mlaf(u, s, 0.166666671633720397949219); + u = mlaf(u, s, 0.5); + + u = s * s * u + s + 1.0f; + u = ldexp2kf(u, q); + + if (d < -104) u = 0; + if (d > 104) u = SLEEF_INFINITYf; + + return u; +} + +static INLINE CONST float expkf(Sleef_float2 d) { + int q = (int)rintfk((d.x + d.y) * R_LN2f); + Sleef_float2 s, t; + float u; + + s = dfadd2_f2_f2_f(d, q * -L2Uf); + s = dfadd2_f2_f2_f(s, q * -L2Lf); + + s = dfnormalize_f2_f2(s); + + u = 0.00136324646882712841033936f; + u = mlaf(u, s.x, 0.00836596917361021041870117f); + u = mlaf(u, s.x, 0.0416710823774337768554688f); + u = mlaf(u, s.x, 0.166665524244308471679688f); + u = mlaf(u, s.x, 0.499999850988388061523438f); + + t = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfsqu_f2_f2(s), u)); + + t = dfadd_f2_f_f2(1, t); + + u = ldexpkf(t.x + t.y, q); + + if (d.x < -104) u = 0; + + return u; +} + +static INLINE CONST float expm1kf(float d) { + int q = (int)rintfk(d * R_LN2f); + float s, u; + + s = mlaf(q, -L2Uf, d); + s = mlaf(q, -L2Lf, s); + + float s2 = s * s, s4 = s2 * s2; + u = POLY6(s, s2, s4, + 0.000198527617612853646278381, + 0.00139304355252534151077271, + 0.00833336077630519866943359, + 0.0416664853692054748535156, + 0.166666671633720397949219, + 0.5); + + u = s * s * u + s; + + if (q != 0) u = ldexp2kf(u + 1, q) - 1; + + return u; +} + +static INLINE CONST Sleef_float2 logkf(float d) { + Sleef_float2 x, x2, s; + float m, t; + int e; + + int o = d < FLT_MIN; + if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); + + e = ilogb2kf(d * (1.0f/0.75f)); + m = ldexp3kf(d, -e); + + if (o) e -= 64; + + x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); + x2 = dfsqu_f2_f2(x); + + t = 0.240320354700088500976562; + t = mlaf(t, x2.x, 0.285112679004669189453125); + t = mlaf(t, x2.x, 0.400007992982864379882812); + Sleef_float2 c = df(0.66666662693023681640625f, 3.69183861259614332084311e-09f); + + s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e); + s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); + s = dfadd_f2_f2_f2(s, dfmul_f2_f2_f2(dfmul_f2_f2_f2(x2, x), + dfadd2_f2_f2_f2(dfmul_f2_f2_f(x2, t), c))); + return s; +} + +EXPORT CONST float xlogf_u1(float d) { + Sleef_float2 x, s; + float m, t, x2; + int e; + + int o = d < FLT_MIN; + if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); + + e = ilogb2kf(d * (1.0f/0.75f)); + m = ldexp3kf(d, -e); + + if (o) e -= 64; + + x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); + x2 = x.x * x.x; + + t = +0.3027294874e+0f; + t = mlaf(t, x2, +0.3996108174e+0f); + t = mlaf(t, x2, +0.6666694880e+0f); + + s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), (float)e); + s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); + s = dfadd_f2_f2_f(s, x2 * x.x * t); + + float r = s.x + s.y; + + if (xisinff(d)) r = SLEEF_INFINITYf; + if (d < 0 || xisnanf(d)) r = SLEEF_NANf; + if (d == 0) r = -SLEEF_INFINITYf; + + return r; +} + +static INLINE CONST Sleef_float2 expk2f(Sleef_float2 d) { + int q = (int)rintfk((d.x + d.y) * R_LN2f); + Sleef_float2 s, t; + float u; + + s = dfadd2_f2_f2_f(d, q * -L2Uf); + s = dfadd2_f2_f2_f(s, q * -L2Lf); + + u = +0.1980960224e-3f; + u = mlaf(u, s.x, +0.1394256484e-2f); + u = mlaf(u, s.x, +0.8333456703e-2f); + u = mlaf(u, s.x, +0.4166637361e-1f); + + t = dfadd2_f2_f2_f(dfmul_f2_f2_f(s, u), +0.166666659414234244790680580464e+0f); + t = dfadd2_f2_f2_f(dfmul_f2_f2_f2(s, t), 0.5); + t = dfadd2_f2_f2_f2(s, dfmul_f2_f2_f2(dfsqu_f2_f2(s), t)); + + t = dfadd2_f2_f_f2(1, t); + + t.x = ldexp2kf(t.x, q); + t.y = ldexp2kf(t.y, q); + + return d.x < -104 ? df(0, 0) : t; +} + +EXPORT CONST float xpowf(float x, float y) { + int yisint = (y == (int)y) || (fabsfk(y) >= (float)(INT64_C(1) << 24)); + int yisodd = (1 & (int)y) != 0 && yisint && fabsfk(y) < (float)(INT64_C(1) << 24); + + float result = expkf(dfmul_f2_f2_f(logkf(fabsfk(x)), y)); + + result = xisnanf(result) ? SLEEF_INFINITYf : result; + result *= (x >= 0 ? 1 : (!yisint ? SLEEF_NANf : (yisodd ? -1 : 1))); + + float efx = mulsignf(fabsfk(x) - 1, y); + if (xisinff(y)) result = efx < 0 ? 0.0f : (efx == 0 ? 1.0f : SLEEF_INFINITYf); + if (xisinff(x) || x == 0) result = (yisodd ? signf(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : SLEEF_INFINITYf); + if (xisnanf(x) || xisnanf(y)) result = SLEEF_NANf; + if (y == 0 || x == 1) result = 1; + + return result; +} + +static INLINE CONST float logk3f(float d) { + float x, x2, t, m; + int e; + + int o = d < FLT_MIN; + if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); + + e = ilogb2kf(d * (1.0f/0.75f)); + m = ldexp3kf(d, -e); + + if (o) e -= 64; + + x = (m-1) / (m+1); + x2 = x * x; + + t = 0.2392828464508056640625f; + t = mlaf(t, x2, 0.28518211841583251953125f); + t = mlaf(t, x2, 0.400005877017974853515625f); + t = mlaf(t, x2, 0.666666686534881591796875f); + t = mlaf(t, x2, 2.0f); + + x = mlaf(x, t, 0.693147180559945286226764f * e); + + return x; +} + +static INLINE CONST float expk3f(float d) { + int q = (int)rintfk(d * R_LN2f); + float s, u; + + s = mlaf(q, -L2Uf, d); + s = mlaf(q, -L2Lf, s); + + u = 0.000198527617612853646278381; + u = mlaf(u, s, 0.00139304355252534151077271); + u = mlaf(u, s, 0.00833336077630519866943359); + u = mlaf(u, s, 0.0416664853692054748535156); + u = mlaf(u, s, 0.166666671633720397949219); + u = mlaf(u, s, 0.5); + + u = mlaf(s * s, u, s + 1.0f); + u = ldexpkf(u, q); + + if (d < -104) u = 0; + + return u; +} + +EXPORT CONST float xfastpowf_u3500(float x, float y) { + float result = expk3f(logk3f(fabsfk(x)) * y); + + int yisint = (y == (int)y) || (fabsfk(y) >= (float)(INT64_C(1) << 24)); + int yisodd = (1 & (int)y) != 0 && yisint && fabsfk(y) < (float)(INT64_C(1) << 24); + + result *= (x < 0 && yisodd) ? -1 : 1; + if (x == 0) result = 0; + if (y == 0) result = 1; + + return result; +} + +EXPORT CONST float xsinhf(float x) { + float y = fabsfk(x); + Sleef_float2 d = expk2f(df(y, 0)); + d = dfsub_f2_f2_f2(d, dfrec_f2_f2(d)); + y = (d.x + d.y) * 0.5f; + + y = fabsfk(x) > 89 ? SLEEF_INFINITYf : y; + y = xisnanf(y) ? SLEEF_INFINITYf : y; + y = mulsignf(y, x); + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +EXPORT CONST float xcoshf(float x) { + float y = fabsfk(x); + Sleef_float2 d = expk2f(df(y, 0)); + d = dfadd_f2_f2_f2(d, dfrec_f2_f2(d)); + y = (d.x + d.y) * 0.5f; + + y = fabsfk(x) > 89 ? SLEEF_INFINITYf : y; + y = xisnanf(y) ? SLEEF_INFINITYf : y; + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +EXPORT CONST float xtanhf(float x) { + float y = fabsfk(x); + Sleef_float2 d = expk2f(df(y, 0)); + Sleef_float2 e = dfrec_f2_f2(d); + d = dfdiv_f2_f2_f2(dfsub_f2_f2_f2(d, e), dfadd_f2_f2_f2(d, e)); + y = d.x + d.y; + + y = fabsfk(x) > 18.714973875f ? 1.0f : y; + y = xisnanf(y) ? 1.0f : y; + y = mulsignf(y, x); + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +EXPORT CONST float xsinhf_u35(float x) { + float e = expm1kf(fabsfk(x)); + float y = (e + 2) / (e + 1) * (0.5f * e); + + y = fabsfk(x) > 88 ? SLEEF_INFINITYf : y; + y = xisnanf(y) ? SLEEF_INFINITYf : y; + y = mulsignf(y, x); + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +EXPORT CONST float xcoshf_u35(float x) { + float e = xexpf(fabsfk(x)); + float y = 0.5f * e + 0.5f / e; + + y = fabsfk(x) > 88 ? SLEEF_INFINITYf : y; + y = xisnanf(y) ? SLEEF_INFINITYf : y; + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +EXPORT CONST float xtanhf_u35(float x) { + float y = fabsfk(x); + float d = expm1kf(2*y); + y = d / (d + 2); + + y = fabsfk(x) > 18.714973875f ? 1.0f : y; + y = xisnanf(y) ? 1.0f : y; + y = mulsignf(y, x); + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +static INLINE CONST Sleef_float2 logk2f(Sleef_float2 d) { + Sleef_float2 x, x2, m, s; + float t; + int e; + + e = ilogbkf(d.x * (1.0f/0.75f)); + m = dfscale_f2_f2_f(d, pow2if(-e)); + + x = dfdiv_f2_f2_f2(dfadd2_f2_f2_f(m, -1), dfadd2_f2_f2_f(m, 1)); + x2 = dfsqu_f2_f2(x); + + t = 0.2392828464508056640625f; + t = mlaf(t, x2.x, 0.28518211841583251953125f); + t = mlaf(t, x2.x, 0.400005877017974853515625f); + t = mlaf(t, x2.x, 0.666666686534881591796875f); + + s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e); + s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); + s = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfmul_f2_f2_f2(x2, x), t)); + + return s; +} + +EXPORT CONST float xasinhf(float x) { + float y = fabsfk(x); + Sleef_float2 d; + + d = y > 1 ? dfrec_f2_f(x) : df(y, 0); + d = dfsqrt_f2_f2(dfadd2_f2_f2_f(dfsqu_f2_f2(d), 1)); + d = y > 1 ? dfmul_f2_f2_f(d, y) : d; + + d = logk2f(dfnormalize_f2_f2(dfadd_f2_f2_f(d, x))); + y = d.x + d.y; + + y = (fabsfk(x) > SQRT_FLT_MAX || xisnanf(y)) ? mulsignf(SLEEF_INFINITYf, x) : y; + y = xisnanf(x) ? SLEEF_NANf : y; + y = xisnegzerof(x) ? -0.0f : y; + + return y; +} + +EXPORT CONST float xacoshf(float x) { + Sleef_float2 d = logk2f(dfadd2_f2_f2_f(dfmul_f2_f2_f2(dfsqrt_f2_f2(dfadd2_f2_f_f(x, 1)), dfsqrt_f2_f2(dfadd2_f2_f_f(x, -1))), x)); + float y = d.x + d.y; + + y = (x > SQRT_FLT_MAX || xisnanf(y)) ? SLEEF_INFINITYf : y; + y = x == 1.0f ? 0.0f : y; + y = x < 1.0f ? SLEEF_NANf : y; + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +EXPORT CONST float xatanhf(float x) { + float y = fabsfk(x); + Sleef_float2 d = logk2f(dfdiv_f2_f2_f2(dfadd2_f2_f_f(1, y), dfadd2_f2_f_f(1, -y))); + y = y > 1.0f ? SLEEF_NANf : (y == 1.0f ? SLEEF_INFINITYf : (d.x + d.y) * 0.5f); + + y = xisinff(x) || xisnanf(y) ? SLEEF_NANf : y; + y = mulsignf(y, x); + y = xisnanf(x) ? SLEEF_NANf : y; + + return y; +} + +EXPORT CONST float xexp2f(float d) { + int q = (int)rintfk(d); + float s, u; + + s = d - q; + + u = +0.1535920892e-3; + u = mlaf(u, s, +0.1339262701e-2); + u = mlaf(u, s, +0.9618384764e-2); + u = mlaf(u, s, +0.5550347269e-1); + u = mlaf(u, s, +0.2402264476e+0); + u = mlaf(u, s, +0.6931471825e+0); + u = dfnormalize_f2_f2(dfadd_f2_f_f2(1, dfmul_f2_f_f(u, s))).x; + + u = ldexp2kf(u, q); + + if (d >= 128) u = SLEEF_INFINITYf; + if (d < -150) u = 0; + + return u; +} + +EXPORT CONST float xexp2f_u35(float d) { + int q = (int)rintfk(d); + float s, u; + + s = d - q; + + u = +0.1535920892e-3; + u = mlaf(u, s, +0.1339262701e-2); + u = mlaf(u, s, +0.9618384764e-2); + u = mlaf(u, s, +0.5550347269e-1); + u = mlaf(u, s, +0.2402264476e+0); + u = mlaf(u, s, +0.6931471825e+0); + u = mlaf(u, s, +0.1000000000e+1); + + u = ldexp2kf(u, q); + + if (d >= 128) u = SLEEF_INFINITYf; + if (d < -150) u = 0; + + return u; +} + +EXPORT CONST float xexp10f(float d) { + int q = (int)rintfk(d * (float)LOG10_2); + float s, u; + + s = mlaf(q, -L10Uf, d); + s = mlaf(q, -L10Lf, s); + + u = +0.6802555919e-1; + u = mlaf(u, s, +0.2078080326e+0); + u = mlaf(u, s, +0.5393903852e+0); + u = mlaf(u, s, +0.1171245337e+1); + u = mlaf(u, s, +0.2034678698e+1); + u = mlaf(u, s, +0.2650949001e+1); + Sleef_float2 x = dfadd_f2_f2_f(df(2.3025851249694824219, -3.1705172516493593157e-08), u * s); + u = dfnormalize_f2_f2(dfadd_f2_f_f2(1, dfmul_f2_f2_f(x, s))).x; + + u = ldexp2kf(u, q); + + if (d > 38.5318394191036238941387f) u = SLEEF_INFINITYf; // log10(FLT_MAX) + if (d < -50) u = 0; + + return u; +} + +EXPORT CONST float xexp10f_u35(float d) { + int q = (int)rintfk(d * (float)LOG10_2); + float s, u; + + s = mlaf(q, -L10Uf, d); + s = mlaf(q, -L10Lf, s); + + u = +0.2064004987e+0; + u = mlaf(u, s, +0.5417877436e+0); + u = mlaf(u, s, +0.1171286821e+1); + u = mlaf(u, s, +0.2034656048e+1); + u = mlaf(u, s, +0.2650948763e+1); + u = mlaf(u, s, +0.2302585125e+1); + u = mlaf(u, s, +0.1000000000e+1); + + u = ldexp2kf(u, q); + + if (d > 38.5318394191036238941387f) u = SLEEF_INFINITYf; // log10(FLT_MAX) + if (d < -50) u = 0; + + return u; +} + +EXPORT CONST float xexpm1f(float a) { + Sleef_float2 d = dfadd2_f2_f2_f(expk2f(df(a, 0)), -1.0f); + float x = d.x + d.y; + if (a > 88.72283172607421875f) x = SLEEF_INFINITYf; + if (a < -16.635532333438687426013570f) x = -1; + if (xisnegzerof(a)) x = -0.0f; + return x; +} + +EXPORT CONST float xlog10f(float d) { + Sleef_float2 x, s; + float m, t, x2; + int e; + + int o = d < FLT_MIN; + if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); + + e = ilogb2kf(d * (1.0f/0.75f)); + m = ldexp3kf(d, -e); + + if (o) e -= 64; + + x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); + x2 = x.x * x.x; + + t = +0.1314289868e+0; + t = mlaf(t, x2, +0.1735493541e+0); + t = mlaf(t, x2, +0.2895309627e+0); + + s = dfmul_f2_f2_f(df(0.30103001, -1.432098889e-08), (float)e); + s = dfadd_f2_f2_f2(s, dfmul_f2_f2_f2(x, df(0.868588984, -2.170757285e-08))); + s = dfadd_f2_f2_f(s, x2 * x.x * t); + + float r = s.x + s.y; + + if (xisinff(d)) r = SLEEF_INFINITYf; + if (d < 0 || xisnanf(d)) r = SLEEF_NANf; + if (d == 0) r = -SLEEF_INFINITYf; + + return r; +} + +EXPORT CONST float xlog2f(float d) { + Sleef_float2 x, s; + float m, t, x2; + int e; + + int o = d < FLT_MIN; + if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); + + e = ilogb2kf(d * (1.0f/0.75f)); + m = ldexp3kf(d, -e); + + if (o) e -= 64; + + x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); + x2 = x.x * x.x; + + t = +0.4374550283e+0f; + t = mlaf(t, x2, +0.5764790177e+0f); + t = mlaf(t, x2, +0.9618012905120f); + + s = dfadd2_f2_f_f2(e, dfmul_f2_f2_f2(x, df(2.8853900432586669922, 3.2734474483568488616e-08))); + s = dfadd2_f2_f2_f(s, x2 * x.x * t); + + float r = s.x + s.y; + + if (xisinff(d)) r = SLEEF_INFINITYf; + if (d < 0 || xisnanf(d)) r = SLEEF_NANf; + if (d == 0) r = -SLEEF_INFINITYf; + + return r; +} + +EXPORT CONST float xlog2f_u35(float d) { + float m, t, x, x2; + int e; + + int o = d < FLT_MIN; + if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); + + e = ilogb2kf(d * (1.0f/0.75f)); + m = ldexp3kf(d, -e); + + if (o) e -= 64; + + x = (m - 1) / (m + 1); + x2 = x * x; + + t = +0.4374088347e+0; + t = mlaf(t, x2, +0.5764843822e+0); + t = mlaf(t, x2, +0.9618024230e+0); + + float r = mlaf(x2 * x, t, mlaf(x, +0.2885390043e+1, e)); + + if (xisinff(d)) r = SLEEF_INFINITYf; + if (d < 0 || xisnanf(d)) r = SLEEF_NANf; + if (d == 0) r = -SLEEF_INFINITYf; + + return r; +} + +EXPORT CONST float xlog1pf(float d) { + Sleef_float2 x, s; + float m, t, x2; + int e; + + float dp1 = d + 1; + + int o = dp1 < FLT_MIN; + if (o) dp1 *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); + + e = ilogb2kf(dp1 * (1.0f/0.75f)); + + t = ldexp3kf(1, -e); + m = mlaf(d, t, t-1); + + if (o) e -= 64; + + x = dfdiv_f2_f2_f2(df(m, 0), dfadd_f2_f_f(2, m)); + x2 = x.x * x.x; + + t = +0.3027294874e+0f; + t = mlaf(t, x2, +0.3996108174e+0f); + t = mlaf(t, x2, +0.6666694880e+0f); + + s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), (float)e); + s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); + s = dfadd_f2_f2_f(s, x2 * x.x * t); + + float r = s.x + s.y; + + if (d > 1e+38) r = SLEEF_INFINITYf; + if (d < -1) r = SLEEF_NANf; + if (d == -1) r = -SLEEF_INFINITYf; + if (xisnegzerof(d)) r = -0.0f; + + return r; +} + +EXPORT CONST float xcbrtf(float d) { + float x, y, q = 1.0f; + int e, r; + + e = ilogbkf(fabsfk(d))+1; + d = ldexp2kf(d, -e); + r = (e + 6144) % 3; + q = (r == 1) ? 1.2599210498948731647672106f : q; + q = (r == 2) ? 1.5874010519681994747517056f : q; + q = ldexp2kf(q, (e + 6144) / 3 - 2048); + + q = mulsignf(q, d); + d = fabsfk(d); + + x = -0.601564466953277587890625f; + x = mlaf(x, d, 2.8208892345428466796875f); + x = mlaf(x, d, -5.532182216644287109375f); + x = mlaf(x, d, 5.898262500762939453125f); + x = mlaf(x, d, -3.8095417022705078125f); + x = mlaf(x, d, 2.2241256237030029296875f); + + y = d * x * x; + y = (y - (2.0f / 3.0f) * y * (y * x - 1.0f)) * q; + + return y; +} + +EXPORT CONST float xcbrtf_u1(float d) { + float x, y, z; + Sleef_float2 q2 = df(1, 0), u, v; + int e, r; + + e = ilogbkf(fabsfk(d))+1; + d = ldexp2kf(d, -e); + r = (e + 6144) % 3; + q2 = (r == 1) ? df(1.2599210739135742188, -2.4018701694217270415e-08) : q2; + q2 = (r == 2) ? df(1.5874010324478149414, 1.9520385308169352356e-08) : q2; + + q2.x = mulsignf(q2.x, d); q2.y = mulsignf(q2.y, d); + d = fabsfk(d); + + x = -0.601564466953277587890625f; + x = mlaf(x, d, 2.8208892345428466796875f); + x = mlaf(x, d, -5.532182216644287109375f); + x = mlaf(x, d, 5.898262500762939453125f); + x = mlaf(x, d, -3.8095417022705078125f); + x = mlaf(x, d, 2.2241256237030029296875f); + + y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0f); + + z = x; + + u = dfmul_f2_f_f(x, x); + u = dfmul_f2_f2_f2(u, u); + u = dfmul_f2_f2_f(u, d); + u = dfadd2_f2_f2_f(u, -x); + y = u.x + u.y; + + y = -2.0 / 3.0 * y * z; + v = dfadd2_f2_f2_f(dfmul_f2_f_f(z, z), y); + v = dfmul_f2_f2_f(v, d); + v = dfmul_f2_f2_f2(v, q2); + z = ldexp2kf(v.x + v.y, (e + 6144) / 3 - 2048); + + if (xisinff(d)) { z = mulsignf(SLEEF_INFINITYf, q2.x); } + if (d == 0) { z = mulsignf(0, q2.x); } + + return z; +} + +// + +EXPORT CONST float xfabsf(float x) { return fabsfk(x); } + +EXPORT CONST float xcopysignf(float x, float y) { return copysignfk(x, y); } + +EXPORT CONST float xfmaxf(float x, float y) { + return y != y ? x : (x > y ? x : y); +} + +EXPORT CONST float xfminf(float x, float y) { + return y != y ? x : (x < y ? x : y); +} + +EXPORT CONST float xfdimf(float x, float y) { + float ret = x - y; + if (ret < 0 || x == y) ret = 0; + return ret; +} + +EXPORT CONST float xtruncf(float x) { + float fr = x - (int32_t)x; + return (xisinff(x) || fabsfk(x) >= (float)(INT64_C(1) << 23)) ? x : copysignfk(x - fr, x); +} + +EXPORT CONST float xfloorf(float x) { + float fr = x - (int32_t)x; + fr = fr < 0 ? fr+1.0f : fr; + return (xisinff(x) || fabsfk(x) >= (float)(INT64_C(1) << 23)) ? x : copysignfk(x - fr, x); +} + +EXPORT CONST float xceilf(float x) { + float fr = x - (int32_t)x; + fr = fr <= 0 ? fr : fr-1.0f; + return (xisinff(x) || fabsfk(x) >= (float)(INT64_C(1) << 23)) ? x : copysignfk(x - fr, x); +} + +EXPORT CONST float xroundf(float d) { + float x = d + 0.5f; + float fr = x - (int32_t)x; + if (fr == 0 && x <= 0) x--; + fr = fr < 0 ? fr+1.0f : fr; + x = d == 0.4999999701976776123f ? 0 : x; // nextafterf(0.5, 0) + return (xisinff(d) || fabsfk(d) >= (float)(INT64_C(1) << 23)) ? d : copysignfk(x - fr, d); +} + +EXPORT CONST float xrintf(float d) { + float x = d + 0.5f; + int32_t isodd = (1 & (int32_t)x) != 0; + float fr = x - (int32_t)x; + fr = (fr < 0 || (fr == 0 && isodd)) ? fr+1.0f : fr; + x = d == 0.50000005960464477539f ? 0 : x; // nextafterf(0.5, 1) + return (xisinff(d) || fabsfk(d) >= (float)(INT64_C(1) << 23)) ? d : copysignfk(x - fr, d); +} + +EXPORT CONST Sleef_float2 xmodff(float x) { + float fr = x - (int32_t)x; + fr = fabsfk(x) > (float)(INT64_C(1) << 23) ? 0 : fr; + Sleef_float2 ret = { copysignfk(fr, x), copysignfk(x - fr, x) }; + return ret; +} + +EXPORT CONST float xldexpf(float x, int exp) { + if (exp > 300) exp = 300; + if (exp < -300) exp = -300; + + int e0 = exp >> 2; + if (exp < 0) e0++; + if (-50 < exp && exp < 50) e0 = 0; + int e1 = exp - (e0 << 2); + + float p = pow2if(e0); + float ret = x * pow2if(e1) * p * p * p * p; + + return ret; +} + +EXPORT CONST float xnextafterf(float x, float y) { + union { + float f; + int32_t i; + } cx; + + cx.f = x == 0 ? mulsignf(0, y) : x; + int c = (cx.i < 0) == (y < x); + if (c) cx.i = -(cx.i ^ (1 << 31)); + + if (x != y) cx.i--; + + if (c) cx.i = -(cx.i ^ (1 << 31)); + + if (cx.f == 0 && x != 0) cx.f = mulsignf(0, x); + if (x == 0 && y == 0) cx.f = y; + if (xisnanf(x) || xisnanf(y)) cx.f = SLEEF_NANf; + + return cx.f; +} + +EXPORT CONST float xfrfrexpf(float x) { + union { + float f; + int32_t u; + } cx; + + if (fabsfk(x) < FLT_MIN) x *= (1 << 30); + + cx.f = x; + cx.u &= ~0x7f800000U; + cx.u |= 0x3f000000U; + + if (xisinff(x)) cx.f = mulsignf(SLEEF_INFINITYf, x); + if (x == 0) cx.f = x; + + return cx.f; +} + +EXPORT CONST int xexpfrexpf(float x) { + union { + float f; + uint32_t u; + } cx; + + int ret = 0; + + if (fabsfk(x) < FLT_MIN) { x *= (1 << 30); ret = -30; } + + cx.f = x; + ret += (int32_t)(((cx.u >> 23) & 0xff)) - 0x7e; + + if (x == 0 || xisnanf(x) || xisinff(x)) ret = 0; + + return ret; +} + +EXPORT CONST float xhypotf_u05(float x, float y) { + x = fabsfk(x); + y = fabsfk(y); + float min = fminfk(x, y), n = min; + float max = fmaxfk(x, y), d = max; + + if (max < FLT_MIN) { n *= UINT64_C(1) << 24; d *= UINT64_C(1) << 24; } + Sleef_float2 t = dfdiv_f2_f2_f2(df(n, 0), df(d, 0)); + t = dfmul_f2_f2_f(dfsqrt_f2_f2(dfadd2_f2_f2_f(dfsqu_f2_f2(t), 1)), max); + float ret = t.x + t.y; + if (xisnanf(ret)) ret = SLEEF_INFINITYf; + if (min == 0) ret = max; + if (xisnanf(x) || xisnanf(y)) ret = SLEEF_NANf; + if (x == SLEEF_INFINITYf || y == SLEEF_INFINITYf) ret = SLEEF_INFINITYf; + return ret; +} + +EXPORT CONST float xhypotf_u35(float x, float y) { + x = fabsfk(x); + y = fabsfk(y); + float min = fminfk(x, y); + float max = fmaxfk(x, y); + + float t = min / max; + float ret = max * SQRTF(1 + t*t); + if (min == 0) ret = max; + if (xisnanf(x) || xisnanf(y)) ret = SLEEF_NANf; + if (x == SLEEF_INFINITYf || y == SLEEF_INFINITYf) ret = SLEEF_INFINITYf; + return ret; +} + +static INLINE CONST float toward0f(float d) { + return d == 0 ? 0 : intBitsToFloat(floatToRawIntBits(d)-1); +} + +static INLINE CONST float ptruncf(float x) { + return fabsfk(x) >= (float)(INT64_C(1) << 23) ? x : (x - (x - (int32_t)x)); +} + +EXPORT CONST float xfmodf(float x, float y) { + float nu = fabsfk(x), de = fabsfk(y), s = 1, q; + if (de < FLT_MIN) { nu *= UINT64_C(1) << 25; de *= UINT64_C(1) << 25; s = 1.0f / (UINT64_C(1) << 25); } + Sleef_float2 r = df(nu, 0); + float rde = toward0f(1.0f / de); + + for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 + q = ptruncf(toward0f(r.x) * rde); + q = (3*de > r.x && r.x >= de) ? 2 : q; + q = (2*de > r.x && r.x >= de) ? 1 : q; + r = dfnormalize_f2_f2(dfadd2_f2_f2_f2(r, dfmul_f2_f_f(q, -de))); + if (r.x < de) break; + } + + float ret = (r.x + r.y) * s; + if (r.x + r.y == de) ret = 0; + ret = mulsignf(ret, x); + if (nu < de) ret = x; + if (de == 0) ret = SLEEF_NANf; + + return ret; +} + +static INLINE CONST float rintfk2(float d) { + float x = d + 0.5f; + int32_t isodd = (1 & (int32_t)x) != 0; + float fr = x - (int32_t)x; + fr = (fr < 0 || (fr == 0 && isodd)) ? fr+1.0f : fr; + return (fabsfk(d) >= (float)(INT64_C(1) << 23)) ? d : copysignfk(x - fr, d); +} + +EXPORT CONST float xremainderf(float x, float y) { + float n = fabsfk(x), d = fabsfk(y), s = 1, q; + if (d < FLT_MIN*2) { n *= UINT64_C(1) << 25; d *= UINT64_C(1) << 25; s = 1.0f / (UINT64_C(1) << 25); } + float rd = 1.0f / d; + Sleef_float2 r = df(n, 0); + int qisodd = 0; + + for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 + q = rintfk2(r.x * rd); + if (fabsfk(r.x) < 1.5f * d) q = r.x < 0 ? -1 : 1; + if (fabsfk(r.x) < 0.5f * d || (fabsfk(r.x) == 0.5f * d && !qisodd)) q = 0; + if (q == 0) break; + if (xisinff(q * -d)) q = q + mulsignf(-1, r.x); + qisodd ^= (1 & (int)q) != 0 && fabsfk(q) < (float)(INT64_C(1) << 24); + r = dfnormalize_f2_f2(dfadd2_f2_f2_f2(r, dfmul_f2_f_f(q, -d))); + } + + float ret = r.x * s; + ret = mulsignf(ret, x); + if (xisinff(y)) ret = xisinff(x) ? SLEEF_NANf : x; + if (d == 0) ret = SLEEF_NANf; + + return ret; +} + +EXPORT CONST float xsqrtf_u05(float d) { + float q = 0.5f; + + d = d < 0 ? SLEEF_NANf : d; + + if (d < 5.2939559203393770e-23f) { + d *= 1.8889465931478580e+22f; + q = 7.2759576141834260e-12f * 0.5f; + } + + if (d > 1.8446744073709552e+19f) { + d *= 5.4210108624275220e-20f; + q = 4294967296.0f * 0.5f; + } + + // http://en.wikipedia.org/wiki/Fast_inverse_square_root + float x = intBitsToFloat(0x5f375a86 - (floatToRawIntBits(d + 1e-45f) >> 1)); + + x = x * (1.5f - 0.5f * d * x * x); + x = x * (1.5f - 0.5f * d * x * x); + x = x * (1.5f - 0.5f * d * x * x) * d; + + Sleef_float2 d2 = dfmul_f2_f2_f2(dfadd2_f2_f_f2(d, dfmul_f2_f_f(x, x)), dfrec_f2_f(x)); + + float ret = (d2.x + d2.y) * q; + + ret = d == SLEEF_INFINITYf ? SLEEF_INFINITYf : ret; + ret = d == 0 ? d : ret; + + return ret; +} + +EXPORT CONST float xsqrtf_u35(float d) { + float q = 1.0f; + + d = d < 0 ? SLEEF_NANf : d; + + if (d < 5.2939559203393770e-23f) { + d *= 1.8889465931478580e+22f; + q = 7.2759576141834260e-12f; + } + + if (d > 1.8446744073709552e+19f) { + d *= 5.4210108624275220e-20f; + q = 4294967296.0f; + } + + // http://en.wikipedia.org/wiki/Fast_inverse_square_root + float x = intBitsToFloat(0x5f375a86 - (floatToRawIntBits(d + 1e-45) >> 1)); + + x = x * (1.5f - 0.5f * d * x * x); + x = x * (1.5f - 0.5f * d * x * x); + x = x * (1.5f - 0.5f * d * x * x); + x = x * (1.5f - 0.5f * d * x * x); + + return d == SLEEF_INFINITYf ? SLEEF_INFINITYf : (x * d * q); +} + +EXPORT CONST float xsqrtf(float d) { return SQRTF(d); } + +EXPORT CONST float xfmaf(float x, float y, float z) { + float h2 = x * y + z, q = 1; + if (fabsfk(h2) < 1e-38f) { + const float c0 = 1 << 25, c1 = c0 * c0, c2 = c1 * c1; + x *= c1; + y *= c1; + z *= c2; + q = 1.0f / c2; + } + if (fabsfk(h2) > 1e+38f) { + const float c0 = 1 << 25, c1 = c0 * c0, c2 = c1 * c1; + x *= 1.0 / c1; + y *= 1.0 / c1; + z *= 1.0 / c2; + q = c2; + } + Sleef_float2 d = dfmul_f2_f_f(x, y); + d = dfadd2_f2_f2_f(d, z); + float ret = (x == 0 || y == 0) ? z : (d.x + d.y); + if (xisinff(z) && !xisinff(x) && !xisnanf(x) && !xisinff(y) && !xisnanf(y)) h2 = z; + return (xisinff(h2) || xisnanf(h2)) ? h2 : ret*q; +} + +// + +static INLINE CONST Sleef_float2 sinpifk(float d) { + float u, s, t; + Sleef_float2 x, s2; + + u = d * 4; + int q = ceilfk(u) & ~1; + int o = (q & 2) != 0; + + s = u - (float)q; + t = s; + s = s * s; + s2 = dfmul_f2_f_f(t, t); + + // + + u = o ? -0.2430611801e-7f : +0.3093842054e-6f; + u = mlaf(u, s, o ? +0.3590577080e-5f : -0.3657307388e-4f); + u = mlaf(u, s, o ? -0.3259917721e-3f : +0.2490393585e-2f); + x = dfadd2_f2_f_f2(u * s, o ? df(0.015854343771934509277, 4.4940051354032242811e-10) : + df(-0.080745510756969451904, -1.3373665339076936258e-09)); + x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), o ? df(-0.30842512845993041992, -9.0728339030733922277e-09) : + df(0.78539818525314331055, -2.1857338617566484855e-08)); + + x = dfmul_f2_f2_f2(x, o ? s2 : df(t, 0)); + x = o ? dfadd2_f2_f2_f(x, 1) : x; + + // + + if ((q & 4) != 0) { x.x = -x.x; x.y = -x.y; } + + return x; +} + +EXPORT CONST float xsinpif_u05(float d) { + Sleef_float2 x = sinpifk(d); + float r = x.x + x.y; + + if (xisnegzerof(d)) r = -0.0; + if (fabsfk(d) > TRIGRANGEMAX4f) r = 0; + if (xisinff(d)) r = SLEEF_NANf; + + return r; +} + +static INLINE CONST Sleef_float2 cospifk(float d) { + float u, s, t; + Sleef_float2 x, s2; + + u = d * 4; + int q = ceilfk(u) & ~1; + int o = (q & 2) == 0; + + s = u - (float)q; + t = s; + s = s * s; + s2 = dfmul_f2_f_f(t, t); + + // + + u = o ? -0.2430611801e-7f : +0.3093842054e-6f; + u = mlaf(u, s, o ? +0.3590577080e-5f : -0.3657307388e-4f); + u = mlaf(u, s, o ? -0.3259917721e-3f : +0.2490393585e-2f); + x = dfadd2_f2_f_f2(u * s, o ? df(0.015854343771934509277, 4.4940051354032242811e-10) : + df(-0.080745510756969451904, -1.3373665339076936258e-09)); + x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), o ? df(-0.30842512845993041992, -9.0728339030733922277e-09) : + df(0.78539818525314331055, -2.1857338617566484855e-08)); + + x = dfmul_f2_f2_f2(x, o ? s2 : df(t, 0)); + x = o ? dfadd2_f2_f2_f(x, 1) : x; + + // + + if (((q+2) & 4) != 0) { x.x = -x.x; x.y = -x.y; } + + return x; +} + +EXPORT CONST float xcospif_u05(float d) { + Sleef_float2 x = cospifk(d); + float r = x.x + x.y; + + if (fabsfk(d) > TRIGRANGEMAX4f) r = 1; + if (xisinff(d)) r = SLEEF_NANf; + + return r; +} + +typedef struct { + Sleef_float2 a, b; +} df2; + +static CONST df2 gammafk(float a) { + Sleef_float2 clc = df(0, 0), clln = df(1, 0), clld = df(1, 0), v = df(1, 0), x, y, z; + float t, u; + + int otiny = fabsfk(a) < 1e-30f, oref = a < 0.5f; + + x = otiny ? df(0, 0) : (oref ? dfadd2_f2_f_f(1, -a) : df(a, 0)); + + int o0 = (0.5f <= x.x && x.x <= 1.2), o2 = 2.3 < x.x; + + y = dfnormalize_f2_f2(dfmul_f2_f2_f2(dfadd2_f2_f2_f(x, 1), x)); + y = dfnormalize_f2_f2(dfmul_f2_f2_f2(dfadd2_f2_f2_f(x, 2), y)); + + clln = (o2 && x.x <= 7) ? y : clln; + + x = (o2 && x.x <= 7) ? dfadd2_f2_f2_f(x, 3) : x; + t = o2 ? (1.0 / x.x) : dfnormalize_f2_f2(dfadd2_f2_f2_f(x, o0 ? -1 : -2)).x; + + u = o2 ? +0.000839498720672087279971000786 : (o0 ? +0.9435157776e+0f : +0.1102489550e-3f); + u = mlaf(u, t, o2 ? -5.17179090826059219329394422e-05 : (o0 ? +0.8670063615e+0f : +0.8160019934e-4f)); + u = mlaf(u, t, o2 ? -0.000592166437353693882857342347 : (o0 ? +0.4826702476e+0f : +0.1528468856e-3f)); + u = mlaf(u, t, o2 ? +6.97281375836585777403743539e-05 : (o0 ? -0.8855129778e-1f : -0.2355068718e-3f)); + u = mlaf(u, t, o2 ? +0.000784039221720066627493314301 : (o0 ? +0.1013825238e+0f : +0.4962242092e-3f)); + u = mlaf(u, t, o2 ? -0.000229472093621399176949318732 : (o0 ? -0.1493408978e+0f : -0.1193488017e-2f)); + u = mlaf(u, t, o2 ? -0.002681327160493827160473958490 : (o0 ? +0.1697509140e+0f : +0.2891599433e-2f)); + u = mlaf(u, t, o2 ? +0.003472222222222222222175164840 : (o0 ? -0.2072454542e+0f : -0.7385451812e-2f)); + u = mlaf(u, t, o2 ? +0.083333333333333333335592087900 : (o0 ? +0.2705872357e+0f : +0.2058077045e-1f)); + + y = dfmul_f2_f2_f2(dfadd2_f2_f2_f(x, -0.5), logk2f(x)); + y = dfadd2_f2_f2_f2(y, dfneg_f2_f2(x)); + y = dfadd2_f2_f2_f2(y, dfx(0.91893853320467278056)); // 0.5*log(2*M_PI) + + z = dfadd2_f2_f2_f(dfmul_f2_f_f (u, t), o0 ? -0.400686534596170958447352690395e+0f : -0.673523028297382446749257758235e-1f); + z = dfadd2_f2_f2_f(dfmul_f2_f2_f(z, t), o0 ? +0.822466960142643054450325495997e+0f : +0.322467033928981157743538726901e+0f); + z = dfadd2_f2_f2_f(dfmul_f2_f2_f(z, t), o0 ? -0.577215665946766039837398973297e+0f : +0.422784335087484338986941629852e+0f); + z = dfmul_f2_f2_f(z, t); + + clc = o2 ? y : z; + + clld = o2 ? dfadd2_f2_f2_f(dfmul_f2_f_f(u, t), 1) : clld; + + y = clln; + + clc = otiny ? dfx(41.58883083359671856503) : // log(2^60) + (oref ? dfadd2_f2_f2_f2(dfx(1.1447298858494001639), dfneg_f2_f2(clc)) : clc); // log(M_PI) + clln = otiny ? df(1, 0) : (oref ? clln : clld); + + if (oref) x = dfmul_f2_f2_f2(clld, sinpifk(a - (float)(INT64_C(1) << 12) * (int32_t)(a * (1.0 / (INT64_C(1) << 12))))); + + clld = otiny ? df(a*((INT64_C(1) << 30)*(float)(INT64_C(1) << 30)), 0) : (oref ? x : y); + + df2 ret = { clc, dfdiv_f2_f2_f2(clln, clld) }; + + return ret; +} + +EXPORT CONST float xtgammaf_u1(float a) { + df2 d = gammafk(a); + Sleef_float2 y = dfmul_f2_f2_f2(expk2f(d.a), d.b); + float r = y.x + y.y; + r = (a == -SLEEF_INFINITYf || (a < 0 && xisintf(a)) || (xisnumberf(a) && a < 0 && xisnanf(r))) ? SLEEF_NANf : r; + r = ((a == SLEEF_INFINITYf || xisnumberf(a)) && a >= -FLT_MIN && (a == 0 || a > 36 || xisnanf(r))) ? mulsignf(SLEEF_INFINITYf, a) : r; + return r; +} + +EXPORT CONST float xlgammaf_u1(float a) { + df2 d = gammafk(a); + Sleef_float2 y = dfadd2_f2_f2_f2(d.a, logk2f(dfabs_f2_f2(d.b))); + float r = y.x + y.y; + r = (xisinff(a) || (a <= 0 && xisintf(a)) || (xisnumberf(a) && xisnanf(r))) ? SLEEF_INFINITYf : r; + return r; +} + +EXPORT CONST float xerff_u1(float a) { + float s = a, t, u; + Sleef_float2 d; + + a = fabsfk(a); + int o0 = a < 1.1f, o1 = a < 2.4f, o2 = a < 4.0f; + u = o0 ? (a*a) : a; + + t = o0 ? +0.7089292194e-4f : o1 ? -0.1792667899e-4f : -0.9495757695e-5f; + t = mlaf(t, u, o0 ? -0.7768311189e-3f : o1 ? +0.3937633010e-3f : +0.2481465926e-3f); + t = mlaf(t, u, o0 ? +0.5159463733e-2f : o1 ? -0.3949181177e-2f : -0.2918176819e-2f); + t = mlaf(t, u, o0 ? -0.2683781274e-1f : o1 ? +0.2445474640e-1f : +0.2059706673e-1f); + t = mlaf(t, u, o0 ? +0.1128318012e+0f : o1 ? -0.1070996150e+0f : -0.9901899844e-1f); + d = dfmul_f2_f_f(t, u); + d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.376125876000657465175213237214e+0) : + o1 ? dfx(-0.634588905908410389971210809210e+0) : + dfx(-0.643598050547891613081201721633e+0)); + d = dfmul_f2_f2_f(d, u); + d = dfadd2_f2_f2_f2(d, o0 ? dfx(+0.112837916021059138255978217023e+1) : + o1 ? dfx(-0.112879855826694507209862753992e+1) : + dfx(-0.112461487742845562801052956293e+1)); + d = dfmul_f2_f2_f(d, a); + d = o0 ? d : dfadd_f2_f_f2(1.0, dfneg_f2_f2(expk2f(d))); + u = mulsignf(o2 ? (d.x + d.y) : 1, s); + u = xisnanf(a) ? SLEEF_NANf : u; + return u; +} + +EXPORT CONST float xerfcf_u15(float a) { + float s = a, r = 0, t; + Sleef_float2 u, d, x; + a = fabsfk(a); + int o0 = a < 1.0f, o1 = a < 2.2f, o2 = a < 4.3f, o3 = a < 10.1f; + u = o1 ? df(a, 0) : dfdiv_f2_f2_f2(df(1, 0), df(a, 0)); + + t = o0 ? -0.8638041618e-4f : o1 ? -0.6236977242e-5f : o2 ? -0.3869504035e+0f : +0.1115344167e+1f; + t = mlaf(t, u.x, o0 ? +0.6000166177e-3f : o1 ? +0.5749821503e-4f : o2 ? +0.1288077235e+1f : -0.9454904199e+0f); + t = mlaf(t, u.x, o0 ? -0.1665703603e-2f : o1 ? +0.6002851478e-5f : o2 ? -0.1816803217e+1f : -0.3667259514e+0f); + t = mlaf(t, u.x, o0 ? +0.1795156277e-3f : o1 ? -0.2851036377e-2f : o2 ? +0.1249150872e+1f : +0.7155663371e+0f); + t = mlaf(t, u.x, o0 ? +0.1914106123e-1f : o1 ? +0.2260518074e-1f : o2 ? -0.1328857988e+0f : -0.1262947265e-1f); + + d = dfmul_f2_f2_f(u, t); + d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.102775359343930288081655368891e+0) : + o1 ? dfx(-0.105247583459338632253369014063e+0) : + o2 ? dfx(-0.482365310333045318680618892669e+0) : + dfx(-0.498961546254537647970305302739e+0)); + d = dfmul_f2_f2_f2(d, u); + d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.636619483208481931303752546439e+0) : + o1 ? dfx(-0.635609463574589034216723775292e+0) : + o2 ? dfx(-0.134450203224533979217859332703e-2) : + dfx(-0.471199543422848492080722832666e-4)); + d = dfmul_f2_f2_f2(d, u); + d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.112837917790537404939545770596e+1) : + o1 ? dfx(-0.112855987376668622084547028949e+1) : + o2 ? dfx(-0.572319781150472949561786101080e+0) : + dfx(-0.572364030327966044425932623525e+0)); + + x = dfmul_f2_f2_f(o1 ? d : df(-a, 0), a); + x = o1 ? x : dfadd2_f2_f2_f2(x, d); + + x = expk2f(x); + x = o1 ? x : dfmul_f2_f2_f2(x, u); + + r = o3 ? (x.x + x.y) : 0; + if (s < 0) r = 2 - r; + r = xisnanf(s) ? SLEEF_NANf : r; + return r; +} + +// + +#ifdef ENABLE_MAIN +// gcc -w -DENABLE_MAIN -I../common sleefsp.c rempitab.c -lm +#include +int main(int argc, char **argv) { + float d1 = atof(argv[1]); + //float d2 = atof(argv[2]); + //float d3 = atof(argv[3]); + //printf("%.20g, %.20g\n", (double)d1, (double)d2); + //float i2 = atoi(argv[2]); + //float c = xatan2f_u1(d1, d2); + //printf("round %.20g\n", (double)d1); + printf("test = %.20g\n", (double)xsqrtf_u05(d1)); + //printf("correct = %.20g\n", (double)roundf(d1)); + //printf("rint %.20g\n", (double)d1); + //printf("test = %.20g\n", (double)xrintf(d1)); + //printf("correct = %.20g\n", (double)rintf(d1)); + //Sleef_float2 r = xsincospif_u35(d); + //printf("%g, %g\n", (double)r.x, (double)r.y); +} +#endif diff --git a/src/ufp.cpp b/src/ufp.cpp new file mode 100644 index 00000000..a5edd36a --- /dev/null +++ b/src/ufp.cpp @@ -0,0 +1,81 @@ +/* + +Copyright (c) 2021 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include + +// ---------------------------------------------------------------------------- +// Actual implementation + +namespace nsimd { + +template +int ufp(T a_, T b_) { + UnsignedType a = nsimd::scalar_reinterpret(UnsignedType(), a_); + UnsignedType b = nsimd::scalar_reinterpret(UnsignedType(), b_); + UnsignedType exp_mask = ((UnsignedType)1 << ExponentSize) - 1; + i64 ea = (i64)((a >> MantissaSize) & exp_mask); + i64 eb = (i64)((b >> MantissaSize) & exp_mask); + if (ea - eb > 1 || ea - eb < -1) { + return 0; + } + UnsignedType man_mask = ((UnsignedType)1 << MantissaSize) - 1; + i64 ma = (i64)(a & man_mask) | ((i64)1 << MantissaSize); + i64 mb = (i64)(b & man_mask) | ((i64)1 << MantissaSize); + i64 d = 0; + + if (ea == eb) { + d = ma - mb; + } else if (ea > eb) { + d = 2 * ma - mb; + } else { + d = 2 * mb - ma; + } + d = (d >= 0 ? d : -d); + int i = 0; + for (; i <= MantissaSize + 1 && d >= ((i64)1 << i); i++) + ; + return (int)(MantissaSize + 1 - i); +} + +} // namespace nsimd + +// ---------------------------------------------------------------------------- +// C ABI + +extern "C" { + +NSIMD_DLLSPEC int nsimd_ufp_f16(f16 a, f16 b) { + return nsimd::ufp<5, 10, u16>(a, b); +} + +NSIMD_DLLSPEC int nsimd_ufp_f32(f32 a, f32 b) { + return nsimd::ufp<8, 23, u32>(a, b); +} + +NSIMD_DLLSPEC int nsimd_ufp_f64(f64 a, f64 b) { + return nsimd::ufp<11, 52, u64>(a, b); +} + +} // extern "C" diff --git a/tests/assign_arith.cpp b/tests/assign_arith.cpp new file mode 100644 index 00000000..9c80cb07 --- /dev/null +++ b/tests/assign_arith.cpp @@ -0,0 +1,133 @@ +/* + +Copyright (c) 2021 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include +#include + +/* ------------------------------------------------------------------------- */ +/* Random number */ + +template T get_rand() { + return (T)((rand() % 10) + 1); +} + +template <> f16 get_rand() { + return nsimd_f32_to_f16(get_rand()); +} + +/* ------------------------------------------------------------------------- */ +/* Arithmetic operators */ + +#define HELPER(op1, op2, name) \ + template int test_##name##_T(size_t n) { \ + std::vector a(n), b(n); \ + for (size_t i = 0; i < n; i++) { \ + a[i] = get_rand(); \ + b[i] = get_rand(); \ + } \ + \ + using namespace nsimd; \ + typedef pack pack; \ + for (size_t i = 0; i < n; i += size_t(len(pack()))) { \ + pack tmp1 = loadu(&a[i]); \ + tmp1 op1 loadu(&b[i]); \ + pack tmp2 = loadu(&a[i]) op2 loadu(&b[i]); \ + if (any(tmp1 != tmp2)) { \ + return -1; \ + } \ + } \ + return 0; \ + } \ + \ + int test_##name(size_t n) { \ + return test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n); \ + } \ + \ + int test_##name##_int_only(size_t n) { \ + return test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n); \ + } + +HELPER(+=, +, add) +HELPER(-=, -, sub) +HELPER(*=, *, mul) +HELPER(/=, /, div) +HELPER(|=, |, orb) +HELPER(&=, &, andb) +HELPER(^=, ^, xorb) + +#undef HELPER + +/* ------------------------------------------------------------------------- */ +/* Shift operators */ + +#define HELPER(op1, op2, name) \ + template int test_##name##_T(size_t n) { \ + std::vector a(n); \ + for (size_t i = 0; i < n; i++) { \ + a[i] = get_rand(); \ + } \ + \ + using namespace nsimd; \ + typedef pack pack; \ + for (int s = 0; s <= 3; s++) { \ + for (size_t i = 0; i < n; i += size_t(len(pack()))) { \ + pack tmp = loadu(&a[i]); \ + tmp op1 s; \ + if (any(tmp != (loadu(&a[i]) op2 s))) { \ + return -1; \ + } \ + } \ + } \ + return 0; \ + } \ + \ + int test_##name(size_t n) { \ + return test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n) || \ + test_##name##_T(n) || test_##name##_T(n); \ + } + +HELPER(<<=, <<, shl) +HELPER(>>=, >>, shr) + +#undef HELPER + +/* ------------------------------------------------------------------------- */ + +int main() { + const size_t n = 2048; + return test_add(n) || test_sub(n) || test_mul(n) || test_div(n) || + test_orb_int_only(n) || test_andb_int_only(n) || + test_xorb_int_only(n) || test_shl(n) || test_shr(n); +} + diff --git a/tests/c11_vec.c b/tests/c11_vec.c new file mode 100644 index 00000000..b137971c --- /dev/null +++ b/tests/c11_vec.c @@ -0,0 +1,38 @@ +/* + +Copyright (c) 2021 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include + +int main() { +#if NSIMD_C >= 2011 + float in[NSIMD_MAX_LEN(f32)]; + int out[NSIMD_MAX_LEN(i32)]; + + nsimd_pack(f32) vin = nsimd_load(unaligned, nsimd_pack(f32), in); + nsimd_pack(i32) vout = nsimd_reinterpret(nsimd_pack(i32), vin); + nsimd_store(unaligned, out, vout); +#endif + + return 0; +} diff --git a/tests/cxx_adv_api_aliases.cpp b/tests/cxx_adv_api_aliases.cpp new file mode 100644 index 00000000..1114ef4e --- /dev/null +++ b/tests/cxx_adv_api_aliases.cpp @@ -0,0 +1,77 @@ +/* + +Copyright (c) 2021 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include + +/* ------------------------------------------------------------------------- */ +/* Random number */ + +template T get_rand() { + return (T)((rand() % 100) - 50); +} + +template <> f16 get_rand() { + return nsimd_f32_to_f16(get_rand()); +} + +/* ------------------------------------------------------------------------- */ + +template int test_aliases(size_t n) { + std::vector a(n), b(n); + + for (size_t i = 0; i < n; i++) { + a[i] = get_rand(); + b[i] = get_rand(); + } + + using namespace nsimd; + typedef pack pack; + size_t step = size_t(len(pack())); + for (size_t i = 0; i + step <= n; i += step) { + pack tmp1 = loadu(&a[i]); + pack tmp2 = loadu(&b[i]); + if (any(fabs(tmp1) != abs(tmp1))) { + return -1; + } + if (any(fmin(tmp1, tmp2) != min(tmp1, tmp2))) { + return -1; + } + if (any(fmax(tmp1, tmp2) != max(tmp1, tmp2))) { + return -1; + } + } + + return 0; +} + +/* ------------------------------------------------------------------------- */ + +int main() { + return test_aliases(2048) || test_aliases(2048) || + test_aliases(2048) || test_aliases(2048) || + test_aliases(2048) || test_aliases(2048) || + test_aliases(2048) || test_aliases(2048) || + test_aliases(2048) || test_aliases(2048) || + test_aliases(2048); +} diff --git a/tests/fp16.c b/tests/fp16.prec11.c similarity index 87% rename from tests/fp16.c rename to tests/fp16.prec11.c index e81b6b6f..30ea6a51 100644 --- a/tests/fp16.c +++ b/tests/fp16.prec11.c @@ -31,21 +31,6 @@ SOFTWARE. /* ------------------------------------------------------------------------- */ -#ifndef NSIMD_NO_IEEE754 - -int is_nan(float a) { - union { - u32 u; - f32 f; - } buf; - buf.f = a; - return ((buf.u & 0x7FFFFF) != 0u) && ((buf.u & 0x7F800000) == 0x7F800000); -} - -#endif - -/* ------------------------------------------------------------------------- */ - float via_fp16(float a) { return nsimd_f16_to_f32(nsimd_f32_to_f16(a)); } /* ------------------------------------------------------------------------- */ @@ -56,26 +41,12 @@ float mk_fp32(int mantissa, int exponent) { /* ------------------------------------------------------------------------- */ -#ifndef NSIMD_NO_IEEE754 - -float mk_fp32_bin(u32 a) { - union { - u32 u; - f32 f; - } buf; - buf.u = a; - return buf.f; -} - -#endif - -/* ------------------------------------------------------------------------- */ - int test_f16_to_f32(u16 val, u32 expected) { f32 fexpected = nsimd_scalar_reinterpret_f32_u32(expected); f32 res = nsimd_u16_to_f32(val); u32 ures = nsimd_scalar_reinterpret_u32_f32(res); - if (ures != expected) { + if ((nsimd_isnan_f32(fexpected) && !nsimd_isnan_f32(res)) || + (!nsimd_isnan_f32(fexpected) && ures != expected)) { fprintf(stdout, "Error, nsimd_f16_to_f32: expected %e(0x%x) but got %e(0x%x) \n", (f64)fexpected, expected, (f64)res, ures); @@ -92,7 +63,7 @@ int test_f32_to_f16(u32 val, u16 expected) { f16 fres = nsimd_f32_to_f16(nsimd_scalar_reinterpret_f32_u32(val)); u16 ures = nsimd_scalar_reinterpret_u16_f16(fres); if (ures != expected) { - fprintf(stdout, "Error, nsimd_f16_to_f32: expected 0x%x but got 0x%x \n", + fprintf(stdout, "Error, nsimd_f32_to_f16: expected 0x%x but got 0x%x \n", expected, ures); fflush(stdout); return 1; @@ -105,9 +76,9 @@ int test_f32_to_f16(u32 val, u16 expected) { int main(void) { #ifndef NSIMD_NO_IEEE754 - const float infty = mk_fp32_bin(0x7F800000); - const float m_infty = mk_fp32_bin(0xFF800000); - const float nan = mk_fp32_bin(0x7FC00000); + const float infty = nsimd_scalar_reinterpret_f32_u32(0x7F800000); + const float m_infty = nsimd_scalar_reinterpret_f32_u32(0xFF800000); + const float nan = nsimd_scalar_reinterpret_f32_u32(0x7FC00000); #endif int i; @@ -198,7 +169,7 @@ int main(void) { fflush(stdout); return EXIT_FAILURE; } - if (!is_nan(via_fp16(nan))) { + if (!nsimd_isnan_f32(via_fp16(nan))) { fprintf(stdout, "... Error, %i \n", __LINE__); fflush(stdout); return EXIT_FAILURE; diff --git a/tests/memory.c b/tests/memory.prec11.c similarity index 100% rename from tests/memory.c rename to tests/memory.prec11.c diff --git a/tests/modules/common.hpp b/tests/modules/common.hpp index 4f1608ff..cb152096 100644 --- a/tests/modules/common.hpp +++ b/tests/modules/common.hpp @@ -1,6 +1,6 @@ /* -Copyright (c) 2019 Agenium Scale +Copyright (c) 2021 Agenium Scale Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -51,6 +51,25 @@ __device__ bool cmp_Ts(double a, double b) { return __double_as_longlong(a) == __double_as_longlong(b); } +#elif defined(NSIMD_ONEAPI) + +template bool cmp_Ts(const T a, const T b) { return a == b; } + +bool cmp_Ts(sycl::half a, const sycl::half b) { + return nsimd::gpu_reinterpret(u16(), a) == + nsimd::gpu_reinterpret(u16(), b); +} + +bool cmp_Ts(sycl::cl_float a, sycl::cl_float b) { + return nsimd::gpu_reinterpret(u32(), a) == + nsimd::gpu_reinterpret(u32(), b); +} + +bool cmp_Ts(sycl::cl_double a, sycl::cl_double b) { + return nsimd::gpu_reinterpret(u64(), a) == + nsimd::gpu_reinterpret(u64(), b); +} + #endif // ---------------------------------------------------------------------------- @@ -121,7 +140,7 @@ template bool cmp(T *src1, T *src2, unsigned int n) { return bool(host_ret); } -template bool cmp(T *src1, T *src2, unsigned int n, double) { +template bool cmp(T *src1, T *src2, unsigned int n, int) { return cmp(src1, src2, n); } @@ -195,63 +214,123 @@ template bool cmp(T *src1, T *src2, size_t n) { return bool(host_ret); } -template bool cmp(T *src1, T *src2, size_t n, double) { +template bool cmp(T *src1, T *src2, size_t n, int) { return cmp(src1, src2, n); } template void del(T *ptr) { hipFree(ptr); } -#else +#elif defined(NSIMD_ONEAPI) // ---------------------------------------------------------------------------- -// SIMD +// oneAPI -template bool cmp(T *src1, T *src2, unsigned int n) { - return memcmp(src1, src2, n * sizeof(T)) == 0; +// perform reduction on blocks first, note that this could be optimized +// but to check correctness we don't need it now +template +void device_cmp_blocks(T *const src1, const T *const src2, const size_t n, + sycl::accessor + local_buffer, + sycl::nd_item<1> item) { + size_t tid = item.get_local_id().get(0); + size_t i = item.get_global_id().get(0); + + if (i < n) { + local_buffer[tid] = T(cmp_Ts(src1[i], src2[i]) ? 1 : 0); + } + + item.barrier(sycl::access::fence_space::local_space); + + // other approach: see book p 345 + if (tid == 0) { + sycl::ONEAPI::sub_group sg = item.get_sub_group(); + src1[i] = sycl::ONEAPI::reduce(sg, local_buffer[0], + sycl::ONEAPI::multiplies()); + } } -inline double to_double(f64 a) { return a; } -inline double to_double(f32 a) { return (double)a; } -inline double to_double(f16 a) { return (double)nsimd_f16_to_f32(a); } +template +void device_cmp_array(int *const dst, const T *const src1, const size_t n, + sycl::nd_item<1> item) { + // reduction mul on the whole vector + T buf = T(1); + sycl::nd_range<1> nd_range = item.get_nd_range(); + sycl::range<1> range = nd_range.get_local_range(); + for (size_t i = 0; i < n; i += range.size()) { + buf = nsimd::gpu_mul(buf, src1[i]); + } + size_t i = item.get_global_id().get(0); + if (i == 0) { + dst[0] = int(buf); + } +} template -bool cmp(T *src1, T *src2, unsigned int n, double epsilon) { - for (unsigned int i = 0; i < n; i++) { - double a = to_double(src1[i]); - double b = to_double(src2[i]); - double ma, mi; +bool cmp(T *const src1, const T *const src2, unsigned int n) { + + const size_t total_num_threads = (size_t)nsimd_kernel_param(n, 128); + sycl::queue q = nsimd::oneapi::default_queue(); + + sycl::event e1 = q.submit([=](sycl::handler &h) { + sycl::accessor + local_buffer(128, h); + + h.parallel_for(sycl::nd_range<1>(sycl::range<1>(total_num_threads), + sycl::range<1>(128)), + [=](sycl::nd_item<1> item_) { + device_cmp_blocks(src1, src2, size_t(n), local_buffer, + item_); + }); + }); + e1.wait_and_throw(); + + int *device_ret = nsimd::device_calloc(n); + if (device_ret == NULL) { + std::cerr << "ERROR: cannot sycl::malloc_device " << sizeof(int) + << " bytes\n"; + exit(EXIT_FAILURE); + } + sycl::event e2 = + q.parallel_for(sycl::nd_range<1>(sycl::range<1>(total_num_threads), + sycl::range<1>(128)), + [=](sycl::nd_item<1> item_) { + device_cmp_array(device_ret, src1, size_t(n), item_); + }); + e2.wait_and_throw(); - if (std::isnan(a) && std::isnan(b)) { - continue; - } + int host_ret; + q.memcpy((void *)&host_ret, (void *)device_ret, sizeof(int)).wait(); + nsimd::device_free(device_ret); - if (std::isnan(a) || std::isnan(b)) { - return false; - } + return bool(host_ret); +} - if (std::isinf(a) && std::isinf(b) && - ((a > 0 && b > 0) || (a < 0 && b < 0))) { - continue; - } +template bool cmp(T *src1, T *src2, unsigned int n, double) { + return cmp(src1, src2, n); +} - if (std::isinf(a) || std::isinf(b)) { - return false; - } +template void del(T *ptr) { + sycl::queue q = nsimd::oneapi::default_queue(); + sycl::free(ptr, q); +} - a = (a > 0.0 ? a : -a); - b = (b > 0.0 ? b : -b); - ma = (a > b ? a : b); - mi = (a < b ? a : b); +#else - if (ma == 0.0) { - continue; - } +// ---------------------------------------------------------------------------- +// SIMD - if ( (ma - mi) / ma > epsilon) { +template bool cmp(T *src1, T *src2, unsigned int n) { + return memcmp(src1, src2, n * sizeof(T)) == 0; +} + +template bool cmp(T *src1, T *src2, unsigned int n, int ufp) { + for (unsigned int i = 0; i < n; i++) { + if (nsimd::ufp(src1[i], src2[i]) < ufp) { return false; } } - return true; } diff --git a/tests/nsimd.c b/tests/nsimd.prec11.c similarity index 100% rename from tests/nsimd.c rename to tests/nsimd.prec11.c diff --git a/tests/ufp.cpp b/tests/ufp.cpp new file mode 100644 index 00000000..820b74d5 --- /dev/null +++ b/tests/ufp.cpp @@ -0,0 +1,108 @@ +/* + +Copyright (c) 2019 Agenium Scale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include +#include + +// ---------------------------------------------------------------------------- + +template U randbits() { + U ret = 0; + U mask = ((U)1 << CHAR_BIT) - 1; + for (int i = 0; i < (int)sizeof(U); i++) { + ret = (U)(ret | (U)((((U)rand()) & mask) << (CHAR_BIT * i))); + } + return ret; +} + +// ---------------------------------------------------------------------------- + +template int log_std_ulp(U a, U b) { + U d = (U)(a < b ? b - a : a - b); + int i = 0; + for (; i < 63 && d >= (U)1 << i; i++) + ; + return i; +} + +// ---------------------------------------------------------------------------- + +template struct mantissa{}; +template <> struct mantissa { static const int size = 53; }; +template <> struct mantissa { static const int size = 24; }; +template <> struct mantissa { static const int size = 11; }; + +// ---------------------------------------------------------------------------- + +template +int test_ufp(int n) { + T a = nsimd::scalar_cvt(T(), (U)1); + U ua = nsimd::scalar_reinterpret(U(), a); + T ap1 = nsimd::scalar_reinterpret(T(), (U)(ua + 1)); + if (nsimd::ufp(a, ap1) != mantissa::size - 1) { + return -1; + } + + T am1 = nsimd::scalar_reinterpret(T(), (U)(ua - 1)); + if (nsimd::ufp(a, am1) != mantissa::size - 1) { + return -1; + } + + if (nsimd::ufp(a, a) != mantissa::size) { + return -1; + } + if (nsimd::ufp(a, a) != mantissa::size) { + return -1; + } + if (nsimd::ufp(a, a) != mantissa::size) { + return -1; + } + + T ax4 = nsimd::scalar_cvt(T(), (U)4); + if (nsimd::ufp(a, ax4) != 0) { + return -1; + } + + U mask = (U)1 << (mantissa::size - 1); + U exponent = (U)((~mask) & ua); + for (int i = 0; i < n; i++) { + U ub = exponent | (randbits() & mask); + T b = nsimd::scalar_reinterpret(T(), ub); + U uc = exponent | (randbits() & mask); + T c = nsimd::scalar_reinterpret(T(), uc); + if (nsimd::ufp(b, c) != mantissa::size - log_std_ulp(ub, uc)) { + return -1; + } + } + + return 0; +} + +// ---------------------------------------------------------------------------- + +int main(void) { + int n = 10000; + return test_ufp(n) || test_ufp(n) || + test_ufp(n); +}