From 6d229217598b4fe19700e28e5250d657d0e7dfd6 Mon Sep 17 00:00:00 2001 From: Connor McMonigle Date: Sun, 24 Apr 2022 19:32:17 -0700 Subject: [PATCH] rely on fma contraction enabling avx support bench: 4208908 --- include/simd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/simd.h b/include/simd.h index 587652d..8004e5e 100644 --- a/include/simd.h +++ b/include/simd.h @@ -146,25 +146,25 @@ struct dot_product_32_type { { const __m256 a_0 = _mm256_load_ps(a + 0 * per_unit); const __m256 b_0 = _mm256_load_ps(b + 0 * per_unit); - sum_0 = _mm256_fmadd_ps(a_0, b_0, sum_0); + sum_0 = _mm256_add_ps(_mm256_mul_ps(a_0, b_0), sum_0); } { const __m256 a_1 = _mm256_load_ps(a + 1 * per_unit); const __m256 b_1 = _mm256_load_ps(b + 1 * per_unit); - sum_1 = _mm256_fmadd_ps(a_1, b_1, sum_1); + sum_1 = _mm256_add_ps(_mm256_mul_ps(a_1, b_1), sum_1); } { const __m256 a_2 = _mm256_load_ps(a + 2 * per_unit); const __m256 b_2 = _mm256_load_ps(b + 2 * per_unit); - sum_2 = _mm256_fmadd_ps(a_2, b_2, sum_2); + sum_2 = _mm256_add_ps(_mm256_mul_ps(a_2, b_2), sum_2); } { const __m256 a_3 = _mm256_load_ps(a + 3 * per_unit); const __m256 b_3 = _mm256_load_ps(b + 3 * per_unit); - sum_3 = _mm256_fmadd_ps(a_3, b_3, sum_3); + sum_3 = _mm256_add_ps(_mm256_mul_ps(a_3, b_3), sum_3); } }