Skip to content

Commit

Permalink
Fix incompatibilities with ARM GCC
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 327517115
  • Loading branch information
Maratyszcza authored and xnnpack-bot committed Aug 19, 2020
1 parent c2146cc commit 7359463
Show file tree
Hide file tree
Showing 12 changed files with 30 additions and 30 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2168,7 +2168,7 @@ IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]" OR IOS_ARCH MATCHES "^armv7")
SET_PROPERTY(SOURCE ${XNNPACK_NEON_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon ")
SET_PROPERTY(SOURCE ${XNNPACK_NEONFMA_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon-vfpv4 ")
SET_PROPERTY(SOURCE ${XNNPACK_NEONV8_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8-a -mfpu=neon-fp-armv8 ")
SET_PROPERTY(SOURCE ${XNNPACK_NEONDOT_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+dotprod ")
SET_PROPERTY(SOURCE ${XNNPACK_NEONDOT_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+dotprod -mfpu=neon-fp-armv8 ")
IF(IOS)
SET_PROPERTY(SOURCE ${XNNPACK_AARCH32_ASM_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -arch ${IOS_ARCH} ")
ENDIF()
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-gavgpool/gen/7p7x-minmax-neon-c16-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2(
const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_point);
const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF)), voutput_zero_point);

int8x16_t vout0123456789ABCDEF = vcombine_s16(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x16_t vout0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
#endif

vout0123456789ABCDEF = vmaxq_s8(vout0123456789ABCDEF, voutput_min);
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-gavgpool/gen/7p7x-minmax-neon-c24-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2(
const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF)), voutput_zero_point);
const int16x8_t vaccGHIJKLMN = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccGHIJ), vqmovn_s32(vaccKLMN)), voutput_zero_point);

int8x16_t vout0123456789ABCDEF = vcombine_s16(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x16_t vout0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x8_t voutGHIJKLMN = vqmovn_s16(vaccGHIJKLMN);
#endif

Expand Down
4 changes: 2 additions & 2 deletions src/qs8-gavgpool/gen/7p7x-minmax-neon-c32-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -515,8 +515,8 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2(
const int16x8_t vaccGHIJKLMN = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccGHIJ), vqmovn_s32(vaccKLMN)), voutput_zero_point);
const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV)), voutput_zero_point);

int8x16_t vout0123456789ABCDEF = vcombine_s16(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x16_t voutGHIJKLMNOPQRSTUV = vcombine_s16(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV));
int8x16_t vout0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x16_t voutGHIJKLMNOPQRSTUV = vcombine_s8(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV));
#endif

vout0123456789ABCDEF = vmaxq_s8(vout0123456789ABCDEF, voutput_min);
Expand Down
8 changes: 4 additions & 4 deletions src/qs8-gavgpool/gen/7p7x-minmax-wasmsimd-c16-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2(
const int8_t* i6 = (const int8_t*) ((uintptr_t) i5 + input_stride);
const size_t input_increment = 7 * input_stride - round_up_po2(channels, 16);

const v128_t vbias = wasm_v128_load((const v128_t*) params->wasmsimd.bias);
const v128_t vbias = wasm_v128_load(params->wasmsimd.bias);
int32_t* b = buffer;
size_t c = channels;
for (; c != 0; c = doz(c, 16)) {
Expand Down Expand Up @@ -347,11 +347,11 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2(
const v128_t vout0123 = wasm_i32x4_sub(wasm_v128_xor(vabsout0123, vsgnacc0123), vsgnacc0123);
const v128_t vout4567 = wasm_i32x4_sub(wasm_v128_xor(vabsout4567, vsgnacc4567), vsgnacc4567);

const v128_t voutput_zero_point = wasm_v128_load((const v128_t*) params->wasmsimd.output_zero_point);
const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
const v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vout0123, vout4567), voutput_zero_point);

const v128_t voutput_min = wasm_v128_load((const v128_t*) params->wasmsimd.output_min);
const v128_t voutput_max = wasm_v128_load((const v128_t*) params->wasmsimd.output_max);
const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);

if XNN_LIKELY(channels >= 8) {
Expand Down
22 changes: 11 additions & 11 deletions src/qs8-gavgpool/gen/7p7x-minmax-wasmsimd-c24-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2(
const int8_t* i6 = (const int8_t*) ((uintptr_t) i5 + input_stride);
const size_t input_increment = 7 * input_stride - round_up_po2(channels, 8);

const v128_t vbias = wasm_v128_load((const v128_t*) params->wasmsimd.bias);
const v128_t vbias = wasm_v128_load(params->wasmsimd.bias);
int32_t* b = buffer;
size_t c = channels;
for (; c >= 24; c -= 24) {
Expand Down Expand Up @@ -109,19 +109,19 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2(
}
if XNN_UNLIKELY(c != 0) {
do {
const v128_t vxi0x01234567 = wasm_i16x8_load_8x8((const v128_t*) i0);
const v128_t vxi0x01234567 = wasm_i16x8_load_8x8(i0);
i0 += 8;
const v128_t vxi1x01234567 = wasm_i16x8_load_8x8((const v128_t*) i1);
const v128_t vxi1x01234567 = wasm_i16x8_load_8x8(i1);
i1 += 8;
const v128_t vxi2x01234567 = wasm_i16x8_load_8x8((const v128_t*) i2);
const v128_t vxi2x01234567 = wasm_i16x8_load_8x8(i2);
i2 += 8;
const v128_t vxi3x01234567 = wasm_i16x8_load_8x8((const v128_t*) i3);
const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3);
i3 += 8;
const v128_t vxi4x01234567 = wasm_i16x8_load_8x8((const v128_t*) i4);
const v128_t vxi4x01234567 = wasm_i16x8_load_8x8(i4);
i4 += 8;
const v128_t vxi5x01234567 = wasm_i16x8_load_8x8((const v128_t*) i5);
const v128_t vxi5x01234567 = wasm_i16x8_load_8x8(i5);
i5 += 8;
const v128_t vxi6x01234567 = wasm_i16x8_load_8x8((const v128_t*) i6);
const v128_t vxi6x01234567 = wasm_i16x8_load_8x8(i6);
i6 += 8;

v128_t vacc0x01234567 = wasm_i16x8_add(vxi0x01234567, vxi1x01234567);
Expand Down Expand Up @@ -493,11 +493,11 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2(
const v128_t vout0123 = wasm_i32x4_sub(wasm_v128_xor(vabsout0123, vsgnacc0123), vsgnacc0123);
const v128_t vout4567 = wasm_i32x4_sub(wasm_v128_xor(vabsout4567, vsgnacc4567), vsgnacc4567);

const v128_t voutput_zero_point = wasm_v128_load((const v128_t*) params->wasmsimd.output_zero_point);
const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
const v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vout0123, vout4567), voutput_zero_point);

const v128_t voutput_min = wasm_v128_load((const v128_t*) params->wasmsimd.output_min);
const v128_t voutput_max = wasm_v128_load((const v128_t*) params->wasmsimd.output_max);
const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);

if XNN_LIKELY(channels >= 8) {
Expand Down
8 changes: 4 additions & 4 deletions src/qs8-gavgpool/gen/7p7x-minmax-wasmsimd-c8-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2(
const int8_t* i6 = (const int8_t*) ((uintptr_t) i5 + input_stride);
const size_t input_increment = 7 * input_stride - round_up_po2(channels, 8);

const v128_t vbias = wasm_v128_load((const v128_t*) params->wasmsimd.bias);
const v128_t vbias = wasm_v128_load(params->wasmsimd.bias);
int32_t* b = buffer;
size_t c = channels;
for (; c != 0; c = doz(c, 8)) {
Expand Down Expand Up @@ -277,11 +277,11 @@ void xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2(
const v128_t vout0123 = wasm_i32x4_sub(wasm_v128_xor(vabsout0123, vsgnacc0123), vsgnacc0123);
const v128_t vout4567 = wasm_i32x4_sub(wasm_v128_xor(vabsout4567, vsgnacc4567), vsgnacc4567);

const v128_t voutput_zero_point = wasm_v128_load((const v128_t*) params->wasmsimd.output_zero_point);
const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
const v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vout0123, vout4567), voutput_zero_point);

const v128_t voutput_min = wasm_v128_load((const v128_t*) params->wasmsimd.output_min);
const v128_t voutput_max = wasm_v128_load((const v128_t*) params->wasmsimd.output_max);
const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);

if (channels & 4) {
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-gavgpool/gen/7x-minmax-neon-c16-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ void xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2(
const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_point);
const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF)), voutput_zero_point);

int8x16_t vout0123456789ABCDEF = vcombine_s16(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x16_t vout0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
#endif

vout0123456789ABCDEF = vmaxq_s8(vout0123456789ABCDEF, voutput_min);
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-gavgpool/gen/7x-minmax-neon-c24-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ void xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2(
const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF)), voutput_zero_point);
const int16x8_t vaccGHIJKLMN = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccGHIJ), vqmovn_s32(vaccKLMN)), voutput_zero_point);

int8x16_t vout0123456789ABCDEF = vcombine_s16(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x16_t vout0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x8_t voutGHIJKLMN = vqmovn_s16(vaccGHIJKLMN);
#endif

Expand Down
4 changes: 2 additions & 2 deletions src/qs8-gavgpool/gen/7x-minmax-neon-c32-acc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,8 @@ void xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2(
const int16x8_t vaccGHIJKLMN = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccGHIJ), vqmovn_s32(vaccKLMN)), voutput_zero_point);
const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV)), voutput_zero_point);

int8x16_t vout0123456789ABCDEF = vcombine_s16(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x16_t voutGHIJKLMNOPQRSTUV = vcombine_s16(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV));
int8x16_t vout0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc01234567), vqmovn_s16(vacc89ABCDEF));
int8x16_t voutGHIJKLMNOPQRSTUV = vcombine_s8(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV));
#endif

vout0123456789ABCDEF = vmaxq_s8(vout0123456789ABCDEF, voutput_min);
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-gavgpool/multipass-neon.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ void xnn_qs8_gavgpool_minmax_ukernel_${ROW_TILE}p${ROW_SUBTILE}x__neon_c${CHANNE

$for C in range(0, CHANNEL_TILE, 16):
$if C + 8 < CHANNEL_TILE:
int8x16_t vout${ABC[C:C+16]} = vcombine_s16(vqmovn_s16(vacc${ABC[C:C+8]}), vqmovn_s16(vacc${ABC[C+8:C+16]}));
int8x16_t vout${ABC[C:C+16]} = vcombine_s8(vqmovn_s16(vacc${ABC[C:C+8]}), vqmovn_s16(vacc${ABC[C+8:C+16]}));
$else:
int8x8_t vout${ABC[C:C+8]} = vqmovn_s16(vacc${ABC[C:C+8]});
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/qs8-gavgpool/unipass-neon.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ void xnn_qs8_gavgpool_minmax_ukernel_${ROW_TILE}x__neon_c${CHANNEL_TILE}${"" if

$for C in range(0, CHANNEL_TILE, 16):
$if C + 8 < CHANNEL_TILE:
int8x16_t vout${ABC[C:C+16]} = vcombine_s16(vqmovn_s16(vacc${ABC[C:C+8]}), vqmovn_s16(vacc${ABC[C+8:C+16]}));
int8x16_t vout${ABC[C:C+16]} = vcombine_s8(vqmovn_s16(vacc${ABC[C:C+8]}), vqmovn_s16(vacc${ABC[C+8:C+16]}));
$else:
int8x8_t vout${ABC[C:C+8]} = vqmovn_s16(vacc${ABC[C:C+8]});
#endif
Expand Down

0 comments on commit 7359463

Please sign in to comment.