Skip to content

Commit

Permalink
[xstormy16] Recognize/support swpn (swap nibbles) instruction.
Browse files Browse the repository at this point in the history
This patch adds support for xstormy16's swap nibbles instruction (swpn).
For the test case:

short foo(short x) {
  return (x&0xff00) | ((x<<4)&0xf0) | ((x>>4)&0x0f);
}

GCC with -O2 currently generates the nine instruction sequence:
foo:    mov r7,r2
        asr r2,#4
        and r2,#15
        mov.w r6,#-256
        and r6,r7
        or r2,r6
        shl r7,#4
        and r7,#255
        or r2,r7
        ret

with this patch, we now generate:
foo:	swpn r2
	ret

To achieve this using combine's four instruction "combinations" requires
a little wizardry.  Firstly, define_insn_and_split are introduced to
treat logical shifts followed by bitwise-AND as macro instructions that
are split after reload.  This is sufficient to recognize a QImode
nibble swap, which can be implemented by swpn followed by either a
zero-extension or a sign-extension from QImode to HImode.  Then finally,
in the correct context, a QImode swap-nibbles pattern can be combined to
preserve the high-byte of a HImode word, matching the xstormy16's swpn
semantics.  The naming of the new code iterators is taken from i386.md.

2023-04-29  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/stormy16/stormy16.md (any_lshift): New code iterator.
	(any_or_plus): Likewise.
	(any_rotate): Likewise.
	(*<any_lshift>_and_internal): New define_insn_and_split to
	recognize a logical shift followed by an AND, and split it
	again after reload.
	(*swpn): New define_insn matching xstormy16's swpn.
	(*swpn_zext): New define_insn recognizing swpn followed by
	zero_extendqihi2, i.e. with the high byte set to zero.
	(*swpn_sext): Likewise, for swpn followed by cbw.
	(*swpn_sext_2): Likewise, for an alternate RTL form.
	(*swpn_zext_ior): A pre-reload splitter so that an swpn+zext+ior
	sequence is split in the correct place to recognize the *swpn_zext
	followed by any_or_plus (ior, xor or plus) instruction.

gcc/testsuite/ChangeLog
	* gcc.target/xstormy16/swpn-1.c: New QImode test case.
	* gcc.target/xstormy16/swpn-2.c: New zero_extend test case.
	* gcc.target/xstormy16/swpn-3.c: New sign_extend test case.
	* gcc.target/xstormy16/swpn-4.c: New HImode test case.
  • Loading branch information
rogersayle committed Apr 29, 2023
1 parent 83c78cb commit 58f3cbb
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 0 deletions.
87 changes: 87 additions & 0 deletions gcc/config/stormy16/stormy16.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
(CARRY_REG 16)
]
)

(define_code_iterator any_lshift [ashift lshiftrt])
(define_code_iterator any_or_plus [plus ior xor])
(define_code_iterator any_rotate [rotate rotatert])

;; ::::::::::::::::::::
;; ::
Expand Down Expand Up @@ -1301,3 +1305,86 @@
[(parallel [(set (match_dup 2) (match_dup 1))
(set (match_dup 1) (match_dup 2))])])

;; Recognize shl+and and shr+and as macro instructions.
(define_insn_and_split "*<code>_and_internal"
[(set (match_operand:HI 0 "register_operand" "=r")
(and:HI (any_lshift:HI (match_operand 1 "register_operand" "0")
(match_operand 2 "const_int_operand" "i"))
(match_operand 3 "const_int_operand" "i")))
(clobber (reg:BI CARRY_REG))]
"IN_RANGE (INTVAL (operands[2]), 0, 15)"
"#"
"reload_completed"
[(parallel [(set (match_dup 0) (any_lshift:HI (match_dup 1) (match_dup 2)))
(clobber (reg:BI CARRY_REG))])
(set (match_dup 0) (and:HI (match_dup 0) (match_dup 3)))])

;; Swap nibbles instruction
(define_insn "*swpn"
[(set (match_operand:HI 0 "register_operand" "=r")
(any_or_plus:HI
(any_or_plus:HI
(and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
(const_int 4))
(const_int 240))
(and:HI (lshiftrt:HI (match_dup 1) (const_int 4))
(const_int 15)))
(and:HI (match_dup 1) (const_int -256))))]
""
"swpn %0")

(define_insn "*swpn_zext"
[(set (match_operand:HI 0 "register_operand" "=r")
(any_or_plus:HI
(and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
(const_int 4))
(const_int 240))
(and:HI (lshiftrt:HI (match_dup 1) (const_int 4))
(const_int 15))))]
""
"swpn %0 | and %0,#255"
[(set_attr "length" "6")])

(define_insn "*swpn_sext"
[(set (match_operand:HI 0 "register_operand" "=r")
(sign_extend:HI
(rotate:QI (subreg:QI (match_operand:HI 1 "register_operand" "0") 0)
(const_int 4))))]
""
"swpn %0 | cbw %0"
[(set_attr "length" "4")])

(define_insn "*swpn_sext_2"
[(set (match_operand:HI 0 "register_operand" "=r")
(sign_extend:HI
(subreg:QI
(any_or_plus:HI
(ashift:HI (match_operand:HI 1 "register_operand" "0")
(const_int 4))
(subreg:HI (lshiftrt:QI (subreg:QI (match_dup 1) 0)
(const_int 4)) 0)) 0)))]
""
"swpn %0 | cbw %0"
[(set_attr "length" "4")])

;; Recognize swpn_zext+ior as a macro instruction.
(define_insn_and_split "*swpn_zext_ior"
[(set (match_operand:HI 0 "register_operand")
(any_or_plus:HI
(any_or_plus:HI
(and:HI (ashift:HI (match_operand:HI 1 "register_operand")
(const_int 4))
(const_int 240))
(and:HI (lshiftrt:HI (match_dup 1) (const_int 4))
(const_int 15)))
(match_operand:HI 2 "nonmemory_operand")))]
"can_create_pseudo_p ()"
"#"
"&& 1"
[(set (match_dup 3) (ior:HI (and:HI (ashift:HI (match_dup 1) (const_int 4))
(const_int 240))
(and:HI (lshiftrt:HI (match_dup 1) (const_int 4))
(const_int 15))))
(set (match_dup 0) (ior:HI (match_dup 3) (match_dup 2)))]
"operands[3] = gen_reg_rtx (HImode);")

10 changes: 10 additions & 0 deletions gcc/testsuite/gcc.target/xstormy16/swpn-1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
unsigned char ior_1(unsigned char x) { return (x>>4) | (x<<4); }
unsigned char ior_2(unsigned char x) { return (x<<4) | (x>>4); }
unsigned char xor_1(unsigned char x) { return (x>>4) ^ (x<<4); }
unsigned char xor_2(unsigned char x) { return (x<<4) ^ (x>>4); }
unsigned char sum_1(unsigned char x) { return (x>>4) + (x<<4); }
unsigned char sum_2(unsigned char x) { return (x<<4) + (x>>4); }
/* { dg-final { scan-assembler-times "swpn r2" 6 } } */

14 changes: 14 additions & 0 deletions gcc/testsuite/gcc.target/xstormy16/swpn-2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */

unsigned short ior_1(unsigned short x) { return ((x&0xf0)>>4) | ((x&0x0f)<<4); }
unsigned short xor_1(unsigned short x) { return ((x&0xf0)>>4) ^ ((x&0x0f)<<4); }
unsigned short sum_1(unsigned short x) { return ((x&0xf0)>>4) + ((x&0x0f)<<4); }

unsigned short ior_2(unsigned short x) { return ((x&0x0f)<<4) | ((x&0xf0)>>4); }
unsigned short xor_2(unsigned short x) { return ((x&0x0f)<<4) ^ ((x&0xf0)>>4); }
unsigned short sum_2(unsigned short x) { return ((x&0x0f)<<4) + ((x&0xf0)>>4); }

/* { dg-final { scan-assembler-times "swpn r2" 6 } } */
/* { dg-final { scan-assembler-times "and r2,#255" 6 } } */

28 changes: 28 additions & 0 deletions gcc/testsuite/gcc.target/xstormy16/swpn-3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */

short ior_1(unsigned short x) {
return (signed char)(((x&0xf0)>>4) | ((x&0x0f)<<4));
}

short xor_1(unsigned short x) {
return (signed char)(((x&0xf0)>>4) ^ ((x&0x0f)<<4));
}

short sum_1(unsigned short x) {
return (signed char)(((x&0xf0)>>4) + ((x&0x0f)<<4));
}

short ior_2(unsigned short x) {
return (signed char)(((x&0x0f)<<4) | ((x&0xf0)>>4));
}

short xor_2(unsigned short x) {
return (signed char)(((x&0x0f)<<4) ^ ((x&0xf0)>>4));
}

short sum_2(unsigned short x) {
return (signed char)(((x&0x0f)<<4) + ((x&0xf0)>>4));
}

/* { dg-final { scan-assembler-times "cbw" 6 } } */
25 changes: 25 additions & 0 deletions gcc/testsuite/gcc.target/xstormy16/swpn-4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */

short ior_abc(short x) { return (x&0xff00) | ((x<<4)&0xf0) | ((x>>4)&0x0f); }
short ior_acb(short x) { return (x&0xff00) | ((x>>4)&0x0f) | ((x<<4)&0xf0); }
short ior_bac(short x) { return ((x<<4)&0xf0) | (x&0xff00) | ((x>>4)&0x0f); }
short ior_bca(short x) { return ((x<<4)&0xf0) | ((x>>4)&0x0f) | (x&0xff00); }
short ior_cab(short x) { return ((x>>4)&0x0f) | (x&0xff00) | ((x<<4)&0xf0); }
short ior_cba(short x) { return ((x>>4)&0x0f) | ((x<<4)&0xf0) | (x&0xff00); }

short xor_abc(short x) { return (x&0xff00) ^ ((x<<4)&0xf0) ^ ((x>>4)&0x0f); }
short xor_acb(short x) { return (x&0xff00) ^ ((x>>4)&0x0f) ^ ((x<<4)&0xf0); }
short xor_bac(short x) { return ((x<<4)&0xf0) ^ (x&0xff00) ^ ((x>>4)&0x0f); }
short xor_bca(short x) { return ((x<<4)&0xf0) ^ ((x>>4)&0x0f) ^ (x&0xff00); }
short xor_cab(short x) { return ((x>>4)&0x0f) ^ (x&0xff00) ^ ((x<<4)&0xf0); }
short xor_cba(short x) { return ((x>>4)&0x0f) ^ ((x<<4)&0xf0) ^ (x&0xff00); }

short sum_abc(short x) { return (x&0xff00) + ((x<<4)&0xf0) + ((x>>4)&0x0f); }
short sum_acb(short x) { return (x&0xff00) + ((x>>4)&0x0f) + ((x<<4)&0xf0); }
short sum_bac(short x) { return ((x<<4)&0xf0) + (x&0xff00) + ((x>>4)&0x0f); }
short sum_bca(short x) { return ((x<<4)&0xf0) + ((x>>4)&0x0f) + (x&0xff00); }
short sum_cab(short x) { return ((x>>4)&0x0f) + (x&0xff00) + ((x<<4)&0xf0); }
short sum_cba(short x) { return ((x>>4)&0x0f) + ((x<<4)&0xf0) + (x&0xff00); }

/* { dg-final { scan-assembler-times "swpn r2" 18 } } */

0 comments on commit 58f3cbb

Please sign in to comment.