Skip to content

Commit

Permalink
Add c[lt]z idiom recognition
Browse files Browse the repository at this point in the history
This recognises the patterns of the form:
  while (n & 1) { n >>= 1 }

Unfortunately there are currently two issues relating to this patch.

Firstly, simplify_using_initial_conditions does not recognise that
	(n != 0) and ((n & 1) == 0) implies that ((n >> 1) != 0).

This preconditions arise following the loop copy-header pass, and the
assumptions returned by number_of_iterations_exit_assumptions then
prevent final value replacement from using the niter result.

I'm not sure what is the best way to fix this - one approach could be to
modify simplify_using_initial_conditions to handle this sort of case,
but it seems that it basically wants the information that ranger could
give anway, so would something like that be a better option?

The second issue arises in the vectoriser, which is able to determine
that the niter->assumptions are always true.
When building with -march=armv8.4-a+sve -S -O3, we get this codegen:

foo (unsigned int b) {
    int c = 0;

    if (b == 0)
      return PREC;

    while (!(b & (1 << (PREC - 1)))) {
        b <<= 1;
        c++;
    }

    return c;
}

foo:
.LFB0:
        .cfi_startproc
        cmp     w0, 0
        cbz     w0, .L6
        blt     .L7
        lsl     w1, w0, 1
        clz     w2, w1
        cmp     w2, 14
        bls     .L8
        mov     x0, 0
        cntw    x3
        add     w1, w2, 1
        index   z1.s, #0, #1
        whilelo p0.s, wzr, w1
.L4:
        add     x0, x0, x3
        mov     p1.b, p0.b
        mov     z0.d, z1.d
        whilelo p0.s, w0, w1
        incw    z1.s
        b.any   .L4
        add     z0.s, z0.s, #1
        lastb   w0, p1, z0.s
        ret
        .p2align 2,,3
.L8:
        mov     w0, 0
        b       .L3
        .p2align 2,,3
.L13:
        lsl     w1, w1, 1
.L3:
        add     w0, w0, 1
        tbz     w1, #31, .L13
        ret
        .p2align 2,,3
.L6:
        mov     w0, 32
        ret
        .p2align 2,,3
.L7:
        mov     w0, 0
        ret
        .cfi_endproc

In essence, the vectoriser uses the niter information to determine
exactly how many iterations of the loop it needs to run. It then uses
SVE whilelo instructions to run this number of iterations. The original
loop counter is also vectorised, despite only being used in the final
iteration, and then the final value of this counter is used as the
return value (which is the same as the number of iterations it computed
in the first place).

This vectorisation is obviously bad, and I think it exposes a latent
bug in the vectoriser, rather than being an issue caused by this
specific patch.

gcc/ChangeLog:

	* tree-ssa-loop-niter.cc (number_of_iterations_cltz): New.
	(number_of_iterations_bitcount): Add call to the above.
	(number_of_iterations_exit_assumptions): Add EQ_EXPR case for
	c[lt]z idiom recognition.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/cltz-max.c: New test.
	* gcc.dg/tree-ssa/clz-char.c: New test.
	* gcc.dg/tree-ssa/clz-int.c: New test.
	* gcc.dg/tree-ssa/clz-long-long.c: New test.
	* gcc.dg/tree-ssa/clz-long.c: New test.
	* gcc.dg/tree-ssa/ctz-char.c: New test.
	* gcc.dg/tree-ssa/ctz-int.c: New test.
	* gcc.dg/tree-ssa/ctz-long-long.c: New test.
	* gcc.dg/tree-ssa/ctz-long.c: New test.
  • Loading branch information
andrewcarlotti committed Jan 16, 2023
1 parent 0419b9b commit 4798080
Show file tree
Hide file tree
Showing 10 changed files with 517 additions and 0 deletions.
72 changes: 72 additions & 0 deletions gcc/testsuite/gcc.dg/tree-ssa/cltz-max.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fno-tree-loop-optimize -fdump-tree-optimized" } */

#define PREC (__CHAR_BIT__)

int clz_count1 (unsigned char b) {
int c = 0;

if (b == 0)
return 0;

while (!(b & (1 << (PREC - 1)))) {
b <<= 1;
c++;
}
if (c <= PREC - 1)
return 0;
else
return 34567;
}

int clz_count2 (unsigned char b) {
int c = 0;

if (b == 0)
return 0;

while (!(b & (1 << PREC - 1))) {
b <<= 1;
c++;
}
if (c <= PREC - 2)
return 0;
else
return 76543;
}

int ctz_count1 (unsigned char b) {
int c = 0;

if (b == 0)
return 0;

while (!(b & 1)) {
b >>= 1;
c++;
}
if (c <= PREC - 1)
return 0;
else
return 23456;
}

int ctz_count2 (unsigned char b) {
int c = 0;

if (b == 0)
return 0;

while (!(b & 1)) {
b >>= 1;
c++;
}
if (c <= PREC - 2)
return 0;
else
return 65432;
}
/* { dg-final { scan-tree-dump-times "34567" 0 "optimized" } } */
/* { dg-final { scan-tree-dump-times "76543" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "23456" 0 "optimized" } } */
/* { dg-final { scan-tree-dump-times "65432" 1 "optimized" } } */
34 changes: 34 additions & 0 deletions gcc/testsuite/gcc.dg/tree-ssa/clz-char.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* { dg-do run } */
/* { dg-require-effective-target clzl } */
/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */

#define PREC (__CHAR_BIT__)

int
__attribute__ ((noinline, noclone))
foo (unsigned char b) {
int c = 0;

if (b == 0)
return PREC;

while (!(b & (1 << (PREC - 1)))) {
b <<= 1;
c++;
}

return c;
}

int main()
{
if (foo(0) != PREC)
__builtin_abort ();
if (foo(1 << (PREC - 1)) != 0)
__builtin_abort ();
if (foo(35) != PREC - 6)
__builtin_abort ();
return 0;
}

/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */
34 changes: 34 additions & 0 deletions gcc/testsuite/gcc.dg/tree-ssa/clz-int.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* { dg-do run } */
/* { dg-require-effective-target clzl } */
/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */

#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)

int
__attribute__ ((noinline, noclone))
foo (unsigned int b) {
int c = 0;

if (b == 0)
return PREC;

while (!(b & (1 << (PREC - 1)))) {
b <<= 1;
c++;
}

return c;
}

int main()
{
if (foo(0) != PREC)
__builtin_abort ();
if (foo(1 << (PREC - 1)) != 0)
__builtin_abort ();
if (foo(35) != PREC - 6)
__builtin_abort ();
return 0;
}

/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */
34 changes: 34 additions & 0 deletions gcc/testsuite/gcc.dg/tree-ssa/clz-long-long.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* { dg-do run } */
/* { dg-require-effective-target clzll } */
/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */

#define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__)

int
__attribute__ ((noinline, noclone))
foo (unsigned long long b) {
int c = 0;

if (b == 0)
return PREC;

while (!(b & (1LL << (PREC - 1)))) {
b <<= 1;
c++;
}

return c;
}

int main()
{
if (foo(0) != PREC)
__builtin_abort ();
if (foo(1LL << (PREC - 1)) != 0)
__builtin_abort ();
if (foo(35) != PREC - 6)
__builtin_abort ();
return 0;
}

/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */
34 changes: 34 additions & 0 deletions gcc/testsuite/gcc.dg/tree-ssa/clz-long.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* { dg-do run } */
/* { dg-require-effective-target clzl } */
/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */

#define PREC (__CHAR_BIT__ * __SIZEOF_LONG__)

int
__attribute__ ((noinline, noclone))
foo (unsigned long b) {
int c = 0;

if (b == 0)
return PREC;

while (!(b & (1L << (PREC - 1)))) {
b <<= 1;
c++;
}

return c;
}

int main()
{
if (foo(0) != PREC)
__builtin_abort ();
if (foo(1L << (PREC - 1)) != 0)
__builtin_abort ();
if (foo(35) != PREC - 6)
__builtin_abort ();
return 0;
}

/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */
36 changes: 36 additions & 0 deletions gcc/testsuite/gcc.dg/tree-ssa/ctz-char.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* { dg-do run } */
/* { dg-require-effective-target ctz } */
/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */

#define PREC (__CHAR_BIT__)

int
__attribute__ ((noinline, noclone))
foo (unsigned char b) {
int c = 0;

if (b == 0)
return PREC;

while (!(b & 1)) {
b >>= 1;
c++;
}

return c;
}

int main()
{
if (foo(0) != PREC)
__builtin_abort ();
if (foo(128) != 7)
__builtin_abort ();
if (foo(96) != 5)
__builtin_abort ();
if (foo(35) != 0)
__builtin_abort ();
return 0;
}

/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */
36 changes: 36 additions & 0 deletions gcc/testsuite/gcc.dg/tree-ssa/ctz-int.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* { dg-do run } */
/* { dg-require-effective-target ctz } */
/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */

#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)

int
__attribute__ ((noinline, noclone))
foo (unsigned int b) {
int c = 0;

if (b == 0)
return PREC;

while (!(b & 1)) {
b >>= 1;
c++;
}

return c;
}

int main()
{
if (foo(0) != PREC)
__builtin_abort ();
if (foo(1 << (PREC - 1)) != PREC - 1)
__builtin_abort ();
if (foo(96) != 5)
__builtin_abort ();
if (foo(35) != 0)
__builtin_abort ();
return 0;
}

/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */
36 changes: 36 additions & 0 deletions gcc/testsuite/gcc.dg/tree-ssa/ctz-long-long.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* { dg-do run } */
/* { dg-require-effective-target ctzll } */
/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */

#define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__)

int
__attribute__ ((noinline, noclone))
foo (unsigned long long b) {
int c = 0;

if (b == 0)
return PREC;

while (!(b & 1)) {
b >>= 1;
c++;
}

return c;
}

int main()
{
if (foo(0) != PREC)
__builtin_abort ();
if (foo(1LL << (PREC - 1)) != PREC - 1)
__builtin_abort ();
if (foo(96) != 5)
__builtin_abort ();
if (foo(35) != 0)
__builtin_abort ();
return 0;
}

/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */
36 changes: 36 additions & 0 deletions gcc/testsuite/gcc.dg/tree-ssa/ctz-long.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* { dg-do run } */
/* { dg-require-effective-target ctzl } */
/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */

#define PREC (__CHAR_BIT__ * __SIZEOF_LONG__)

int
__attribute__ ((noinline, noclone))
foo (unsigned long b) {
int c = 0;

if (b == 0)
return PREC;

while (!(b & 1)) {
b >>= 1;
c++;
}

return c;
}

int main()
{
if (foo(0) != PREC)
__builtin_abort ();
if (foo(1L << (PREC - 1)) != PREC - 1)
__builtin_abort ();
if (foo(96) != 5)
__builtin_abort ();
if (foo(35) != 0)
__builtin_abort ();
return 0;
}

/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */
Loading

0 comments on commit 4798080

Please sign in to comment.