Skip to content

Commit

Permalink
Merge pull request #18722 from hrydgard/neon-conversions
Browse files Browse the repository at this point in the history
Fix NEON compilation error on strict compilers.
  • Loading branch information
hrydgard committed Jan 18, 2024
2 parents 7affb1b + 531f145 commit b4122ef
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
7 changes: 7 additions & 0 deletions Common/Math/CrossSIMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@

#include "stdint.h"

#ifdef __clang__
// Weird how you can't just use #pragma in a macro.
#define DO_NOT_VECTORIZE_LOOP _Pragma("clang loop vectorize(disable)")
#else
#define DO_NOT_VECTORIZE_LOOP
#endif

#if PPSSPP_ARCH(SSE2)
#include <emmintrin.h>
#endif
Expand Down
8 changes: 5 additions & 3 deletions Core/HW/MediaEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.

#include "Common/Serialize/SerializeFuncs.h"
#include "Common/Math/CrossSIMD.h"
#include "Core/Config.h"
#include "Core/Debugger/MemBlockInfo.h"
#include "Core/HW/MediaEngine.h"
Expand Down Expand Up @@ -781,10 +782,10 @@ inline void writeVideoLineRGBA(void *destp, const void *srcp, int width) {
count -= 8;
}
#elif PPSSPP_ARCH(ARM_NEON)
int32x4_t mask = vdupq_n_u32(0x00FFFFFF);
uint32x4_t mask = vdupq_n_u32(0x00FFFFFF);
while (count >= 8) {
int32x4_t pixels1 = vandq_u32(vld1q_u32(src), mask);
int32x4_t pixels2 = vandq_u32(vld1q_u32(src + 4), mask);
uint32x4_t pixels1 = vandq_u32(vld1q_u32(src), mask);
uint32x4_t pixels2 = vandq_u32(vld1q_u32(src + 4), mask);
vst1q_u32(dest, pixels1);
vst1q_u32(dest + 4, pixels2);
src += 8;
Expand All @@ -793,6 +794,7 @@ inline void writeVideoLineRGBA(void *destp, const void *srcp, int width) {
}
#endif
const u32 mask32 = 0x00FFFFFF;
DO_NOT_VECTORIZE_LOOP
while (count--) {
*dest++ = *src++ & mask32;
}
Expand Down
8 changes: 1 addition & 7 deletions GPU/Common/TextureDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "Common/Data/Convert/ColorConv.h"
#include "Common/CPUDetect.h"
#include "Common/Log.h"
#include "Common/Math/CrossSIMD.h"

#include "GPU/GPU.h"
#include "GPU/GPUState.h"
Expand All @@ -41,13 +42,6 @@
#endif
#endif

#ifdef __clang__
// Weird how you can't just use #pragma in a macro.
#define DO_NOT_VECTORIZE_LOOP _Pragma("clang loop vectorize(disable)")
#else
#define DO_NOT_VECTORIZE_LOOP
#endif

const u8 textureBitsPerPixel[16] = {
16, //GE_TFMT_5650,
16, //GE_TFMT_5551,
Expand Down

0 comments on commit b4122ef

Please sign in to comment.