-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[xstormy16] Recognize/support swpn (swap nibbles) instruction.
This patch adds support for xstormy16's swap nibbles instruction (swpn). For the test case: short foo(short x) { return (x&0xff00) | ((x<<4)&0xf0) | ((x>>4)&0x0f); } GCC with -O2 currently generates the nine instruction sequence: foo: mov r7,r2 asr r2,#4 and r2,#15 mov.w r6,#-256 and r6,r7 or r2,r6 shl r7,#4 and r7,#255 or r2,r7 ret with this patch, we now generate: foo: swpn r2 ret To achieve this using combine's four instruction "combinations" requires a little wizardry. Firstly, define_insn_and_split are introduced to treat logical shifts followed by bitwise-AND as macro instructions that are split after reload. This is sufficient to recognize a QImode nibble swap, which can be implemented by swpn followed by either a zero-extension or a sign-extension from QImode to HImode. Then finally, in the correct context, a QImode swap-nibbles pattern can be combined to preserve the high-byte of a HImode word, matching the xstormy16's swpn semantics. The naming of the new code iterators is taken from i386.md. 2023-04-29 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/stormy16/stormy16.md (any_lshift): New code iterator. (any_or_plus): Likewise. (any_rotate): Likewise. (*<any_lshift>_and_internal): New define_insn_and_split to recognize a logical shift followed by an AND, and split it again after reload. (*swpn): New define_insn matching xstormy16's swpn. (*swpn_zext): New define_insn recognizing swpn followed by zero_extendqihi2, i.e. with the high byte set to zero. (*swpn_sext): Likewise, for swpn followed by cbw. (*swpn_sext_2): Likewise, for an alternate RTL form. (*swpn_zext_ior): A pre-reload splitter so that an swpn+zext+ior sequence is split in the correct place to recognize the *swpn_zext followed by any_or_plus (ior, xor or plus) instruction. gcc/testsuite/ChangeLog * gcc.target/xstormy16/swpn-1.c: New QImode test case. * gcc.target/xstormy16/swpn-2.c: New zero_extend test case. * gcc.target/xstormy16/swpn-3.c: New sign_extend test case. * gcc.target/xstormy16/swpn-4.c: New HImode test case.
- Loading branch information
1 parent
83c78cb
commit 58f3cbb
Showing
5 changed files
with
164 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
/* { dg-do compile } */ | ||
/* { dg-options "-O2" } */ | ||
unsigned char ior_1(unsigned char x) { return (x>>4) | (x<<4); } | ||
unsigned char ior_2(unsigned char x) { return (x<<4) | (x>>4); } | ||
unsigned char xor_1(unsigned char x) { return (x>>4) ^ (x<<4); } | ||
unsigned char xor_2(unsigned char x) { return (x<<4) ^ (x>>4); } | ||
unsigned char sum_1(unsigned char x) { return (x>>4) + (x<<4); } | ||
unsigned char sum_2(unsigned char x) { return (x<<4) + (x>>4); } | ||
/* { dg-final { scan-assembler-times "swpn r2" 6 } } */ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
/* { dg-do compile } */ | ||
/* { dg-options "-O2" } */ | ||
|
||
unsigned short ior_1(unsigned short x) { return ((x&0xf0)>>4) | ((x&0x0f)<<4); } | ||
unsigned short xor_1(unsigned short x) { return ((x&0xf0)>>4) ^ ((x&0x0f)<<4); } | ||
unsigned short sum_1(unsigned short x) { return ((x&0xf0)>>4) + ((x&0x0f)<<4); } | ||
|
||
unsigned short ior_2(unsigned short x) { return ((x&0x0f)<<4) | ((x&0xf0)>>4); } | ||
unsigned short xor_2(unsigned short x) { return ((x&0x0f)<<4) ^ ((x&0xf0)>>4); } | ||
unsigned short sum_2(unsigned short x) { return ((x&0x0f)<<4) + ((x&0xf0)>>4); } | ||
|
||
/* { dg-final { scan-assembler-times "swpn r2" 6 } } */ | ||
/* { dg-final { scan-assembler-times "and r2,#255" 6 } } */ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/* { dg-do compile } */ | ||
/* { dg-options "-O2" } */ | ||
|
||
short ior_1(unsigned short x) { | ||
return (signed char)(((x&0xf0)>>4) | ((x&0x0f)<<4)); | ||
} | ||
|
||
short xor_1(unsigned short x) { | ||
return (signed char)(((x&0xf0)>>4) ^ ((x&0x0f)<<4)); | ||
} | ||
|
||
short sum_1(unsigned short x) { | ||
return (signed char)(((x&0xf0)>>4) + ((x&0x0f)<<4)); | ||
} | ||
|
||
short ior_2(unsigned short x) { | ||
return (signed char)(((x&0x0f)<<4) | ((x&0xf0)>>4)); | ||
} | ||
|
||
short xor_2(unsigned short x) { | ||
return (signed char)(((x&0x0f)<<4) ^ ((x&0xf0)>>4)); | ||
} | ||
|
||
short sum_2(unsigned short x) { | ||
return (signed char)(((x&0x0f)<<4) + ((x&0xf0)>>4)); | ||
} | ||
|
||
/* { dg-final { scan-assembler-times "cbw" 6 } } */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* { dg-do compile } */ | ||
/* { dg-options "-O2" } */ | ||
|
||
short ior_abc(short x) { return (x&0xff00) | ((x<<4)&0xf0) | ((x>>4)&0x0f); } | ||
short ior_acb(short x) { return (x&0xff00) | ((x>>4)&0x0f) | ((x<<4)&0xf0); } | ||
short ior_bac(short x) { return ((x<<4)&0xf0) | (x&0xff00) | ((x>>4)&0x0f); } | ||
short ior_bca(short x) { return ((x<<4)&0xf0) | ((x>>4)&0x0f) | (x&0xff00); } | ||
short ior_cab(short x) { return ((x>>4)&0x0f) | (x&0xff00) | ((x<<4)&0xf0); } | ||
short ior_cba(short x) { return ((x>>4)&0x0f) | ((x<<4)&0xf0) | (x&0xff00); } | ||
|
||
short xor_abc(short x) { return (x&0xff00) ^ ((x<<4)&0xf0) ^ ((x>>4)&0x0f); } | ||
short xor_acb(short x) { return (x&0xff00) ^ ((x>>4)&0x0f) ^ ((x<<4)&0xf0); } | ||
short xor_bac(short x) { return ((x<<4)&0xf0) ^ (x&0xff00) ^ ((x>>4)&0x0f); } | ||
short xor_bca(short x) { return ((x<<4)&0xf0) ^ ((x>>4)&0x0f) ^ (x&0xff00); } | ||
short xor_cab(short x) { return ((x>>4)&0x0f) ^ (x&0xff00) ^ ((x<<4)&0xf0); } | ||
short xor_cba(short x) { return ((x>>4)&0x0f) ^ ((x<<4)&0xf0) ^ (x&0xff00); } | ||
|
||
short sum_abc(short x) { return (x&0xff00) + ((x<<4)&0xf0) + ((x>>4)&0x0f); } | ||
short sum_acb(short x) { return (x&0xff00) + ((x>>4)&0x0f) + ((x<<4)&0xf0); } | ||
short sum_bac(short x) { return ((x<<4)&0xf0) + (x&0xff00) + ((x>>4)&0x0f); } | ||
short sum_bca(short x) { return ((x<<4)&0xf0) + ((x>>4)&0x0f) + (x&0xff00); } | ||
short sum_cab(short x) { return ((x>>4)&0x0f) + (x&0xff00) + ((x<<4)&0xf0); } | ||
short sum_cba(short x) { return ((x>>4)&0x0f) + ((x<<4)&0xf0) + (x&0xff00); } | ||
|
||
/* { dg-final { scan-assembler-times "swpn r2" 18 } } */ |