Skip to content

Commit

Permalink
Add SSE4a implementation of NT-store accessor
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Aug 18, 2023
1 parent bdc3853 commit 32a158b
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion examples/stream/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,12 @@ void checkSTREAMresults()

# undef abs
# undef M
# include <emmintrin.h>
# ifdef _MSC_VER
# include <intrin.h>
# else
# include <ammintrin.h>
# include <emmintrin.h>
# endif
# include <llama/llama.hpp>

constexpr auto mapping = llama::mapping::AoS<llama::ArrayExtents<ssize_t, STREAM_ARRAY_SIZE>, STREAM_TYPE>{};
Expand All @@ -613,15 +618,23 @@ struct NonTemporalStoreAccessor
# else
if constexpr(sizeof(T) == sizeof(long long))
{
# ifdef __SSE4A__
_mm_stream_sd(&ref, _mm_set_sd(t));
# else
long long i;
std::memcpy(&i, &t, sizeof(i));
_mm_stream_si64(reinterpret_cast<long long*>(&ref), i);
# endif
}
else if constexpr(sizeof(T) == sizeof(int))
{
# ifdef __SSE4A__
_mm_stream_ss(&ref, _mm_set_ss(t));
# else
int i;
std::memcpy(&i, &t, sizeof(i));
_mm_stream_si32(reinterpret_cast<int*>(&ref), i);
# endif
}
else
{
Expand Down

0 comments on commit 32a158b

Please sign in to comment.