From cfa09d859fb85efcf6765367fa97cad8d6bd3f1c Mon Sep 17 00:00:00 2001 From: Henry de Valence Date: Tue, 4 Jun 2019 14:29:33 -0700 Subject: [PATCH] Use upstream IFMA intrinsics now that they exist. --- build.rs | 9 +-------- src/backend/vector/ifma/field.rs | 24 +++++++++++++----------- src/lib.rs | 8 +------- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/build.rs b/build.rs index 2d020e1f2..0ffa5fc78 100644 --- a/build.rs +++ b/build.rs @@ -1,13 +1,6 @@ -#![cfg_attr( - all(feature = "simd_backend", target_feature = "avx512ifma"), - feature(simd_ffi) -)] -#![cfg_attr( - all(feature = "simd_backend", target_feature = "avx512ifma"), - feature(link_llvm_intrinsics) -)] #![cfg_attr(all(feature = "alloc", not(feature = "std")), feature(alloc))] #![cfg_attr(feature = "nightly", feature(doc_cfg))] +#![cfg_attr(feature = "simd_backend", feature(stdsimd))] #![allow(unused_variables)] #![allow(non_snake_case)] #![allow(dead_code)] diff --git a/src/backend/vector/ifma/field.rs b/src/backend/vector/ifma/field.rs index d220a5a4d..80bd86f1f 100644 --- a/src/backend/vector/ifma/field.rs +++ b/src/backend/vector/ifma/field.rs @@ -14,12 +14,18 @@ use packed_simd::{u64x4, IntoBits}; use backend::serial::u64::field::FieldElement51; -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"] - fn madd52lo(z: u64x4, x: u64x4, y: u64x4) -> u64x4; - #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"] - fn madd52hi(z: u64x4, x: u64x4, y: u64x4) -> u64x4; +/// A wrapper around `vpmadd52luq` that works on `u64x4`. +#[inline(always)] +unsafe fn madd52lo(z: u64x4, x: u64x4, y: u64x4) -> u64x4 { + use core::arch::x86_64::_mm256_madd52lo_epu64; + _mm256_madd52lo_epu64(z.into_bits(), x.into_bits(), y.into_bits()).into_bits() +} + +/// A wrapper around `vpmadd52huq` that works on `u64x4`. +#[inline(always)] +unsafe fn madd52hi(z: u64x4, x: u64x4, y: u64x4) -> u64x4 { + use core::arch::x86_64::_mm256_madd52hi_epu64; + _mm256_madd52hi_epu64(z.into_bits(), x.into_bits(), y.into_bits()).into_bits() } /// A vector of four field elements in radix 2^51, with unreduced coefficients. @@ -203,11 +209,7 @@ use subtle::ConditionallySelectable; impl ConditionallySelectable for F51x4Reduced { #[inline] - fn conditional_select( - a: &F51x4Reduced, - b: &F51x4Reduced, - choice: Choice, - ) -> F51x4Reduced { + fn conditional_select(a: &F51x4Reduced, b: &F51x4Reduced, choice: Choice) -> F51x4Reduced { let mask = (-(choice.unwrap_u8() as i64)) as u64; let mask_vec = u64x4::splat(mask); F51x4Reduced([ diff --git a/src/lib.rs b/src/lib.rs index 07c3befcb..f1ae938d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,17 +9,11 @@ // - Henry de Valence #![no_std] -#![cfg_attr( - any( - all(feature = "simd_backend", target_feature = "avx512ifma"), - all(feature = "nightly", rustdoc) - ), - feature(simd_ffi, link_llvm_intrinsics) -)] #![cfg_attr(feature = "nightly", feature(test))] #![cfg_attr(all(feature = "alloc", not(feature = "std")), feature(alloc))] #![cfg_attr(feature = "nightly", feature(external_doc))] #![cfg_attr(feature = "nightly", feature(doc_cfg))] +#![cfg_attr(feature = "simd_backend", feature(stdsimd))] // Refuse to compile if documentation is missing, but only on nightly. // // This means that missing docs will still fail CI, but means we can use