Merge pull request #18722 from hrydgard/neon-conversions

Fix NEON compilation error on strict compilers.
hrydgard · Jan 18, 2024 · b4122ef · b4122ef
2 parents 7affb1b + 531f145
commit b4122ef
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 10 deletions.
diff --git a/Common/Math/CrossSIMD.h b/Common/Math/CrossSIMD.h
@@ -10,6 +10,13 @@
 
 #include "stdint.h"
 
+#ifdef __clang__
+// Weird how you can't just use #pragma in a macro.
+#define DO_NOT_VECTORIZE_LOOP _Pragma("clang loop vectorize(disable)")
+#else
+#define DO_NOT_VECTORIZE_LOOP
+#endif
+
 #if PPSSPP_ARCH(SSE2)
 #include <emmintrin.h>
 #endif

diff --git a/Core/HW/MediaEngine.cpp b/Core/HW/MediaEngine.cpp
@@ -16,6 +16,7 @@
 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
 
 #include "Common/Serialize/SerializeFuncs.h"
+#include "Common/Math/CrossSIMD.h"
 #include "Core/Config.h"
 #include "Core/Debugger/MemBlockInfo.h"
 #include "Core/HW/MediaEngine.h"
@@ -781,10 +782,10 @@ inline void writeVideoLineRGBA(void *destp, const void *srcp, int width) {
 		count -= 8;
 	}
 #elif PPSSPP_ARCH(ARM_NEON)
-	int32x4_t mask = vdupq_n_u32(0x00FFFFFF);
+	uint32x4_t mask = vdupq_n_u32(0x00FFFFFF);
 	while (count >= 8) {
-		int32x4_t pixels1 = vandq_u32(vld1q_u32(src), mask);
-		int32x4_t pixels2 = vandq_u32(vld1q_u32(src + 4), mask);
+		uint32x4_t pixels1 = vandq_u32(vld1q_u32(src), mask);
+		uint32x4_t pixels2 = vandq_u32(vld1q_u32(src + 4), mask);
 		vst1q_u32(dest, pixels1);
 		vst1q_u32(dest + 4, pixels2);
 		src += 8;
@@ -793,6 +794,7 @@ inline void writeVideoLineRGBA(void *destp, const void *srcp, int width) {
 	}
 #endif
 	const u32 mask32 = 0x00FFFFFF;
+	DO_NOT_VECTORIZE_LOOP
 	while (count--) {
 		*dest++ = *src++ & mask32;
 	}

diff --git a/GPU/Common/TextureDecoder.cpp b/GPU/Common/TextureDecoder.cpp
@@ -23,6 +23,7 @@
 #include "Common/Data/Convert/ColorConv.h"
 #include "Common/CPUDetect.h"
 #include "Common/Log.h"
+#include "Common/Math/CrossSIMD.h"
 
 #include "GPU/GPU.h"
 #include "GPU/GPUState.h"
@@ -41,13 +42,6 @@
 #endif
 #endif
 
-#ifdef __clang__
-// Weird how you can't just use #pragma in a macro.
-#define DO_NOT_VECTORIZE_LOOP _Pragma("clang loop vectorize(disable)")
-#else
-#define DO_NOT_VECTORIZE_LOOP
-#endif
-
 const u8 textureBitsPerPixel[16] = {
 	16,  //GE_TFMT_5650,
 	16,  //GE_TFMT_5551,