Skip to content

Commit

Permalink
i386: Fix up df uses in i386 splitters [PR99104]
Browse files Browse the repository at this point in the history
The following testcase started ICEing with my recent changes to enable
split4 after sel-sched, but it seems the bug is more general.
Some of the i386 splitter condition functions use and rely on df, but
the split passes don't really df_analyze/df_finish_pass, so the DF info
may be stale or not computed at all - the particular ICE is because
there is a new bb and df_get_live_out (bb) returns NULL on it as the
live or lr problem has not been computed yet.

This patch fixes it by not calling ix86_ok_to_clobber_flags from
ix86_avoid_lea_for_add where it wasn't ever needed because the splitters
using that function as condition have (clobber FLAGS) in their pattern.
And, changes the ix86_avoid_lea_for_addr using splitter from normal splitter
to peephole2 splitter that uses peep2_regno_dead_p infrastructure to
determine if FLAGS is dead.  Also, it saves and restores recog_data
variable around the call to distance_non_agu_define and doesn't call
extract_insn_data there, because split_insns or peephole2_insns just
clear recog_data.insn and then fill in recog_data.operand array, and so
might not match what extract_insn will do on the insn at all.

2021-02-18  Jakub Jelinek  <jakub@redhat.com>

	PR target/99104
	* config/i386/i386.c (distance_non_agu_define): Don't call
	extract_insn_cached here.
	(ix86_lea_outperforms): Save and restore recog_data around call
	to distance_non_agu_define and distance_agu_use.
	(ix86_ok_to_clobber_flags): Remove.
	(ix86_avoid_lea_for_add): Don't call ix86_ok_to_clobber_flags.
	(ix86_avoid_lea_for_addr): Likewise.  Adjust function comment.
	* config/i386/i386.md (*lea<mode>): Change from define_insn_and_split
	into define_insn.  Move the splitting to define_peephole2 and
	check there using peep2_regno_dead_p if FLAGS_REG is dead.

	* gcc.dg/pr99104.c: New test.
  • Loading branch information
jakubjelinek committed Feb 18, 2021
1 parent acc0ee5 commit decd8fb
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 73 deletions.
59 changes: 6 additions & 53 deletions gcc/config/i386/i386.c
Expand Up @@ -14754,11 +14754,6 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2,
}
}

/* get_attr_type may modify recog data. We want to make sure
that recog data is valid for instruction INSN, on which
distance_non_agu_define is called. INSN is unchanged here. */
extract_insn_cached (insn);

if (!found)
return -1;

Expand Down Expand Up @@ -14928,17 +14923,15 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
return true;
}

rtx_insn *rinsn = recog_data.insn;
/* Remember recog_data content. */
struct recog_data_d recog_data_save = recog_data;

dist_define = distance_non_agu_define (regno1, regno2, insn);
dist_use = distance_agu_use (regno0, insn);

/* distance_non_agu_define can call extract_insn_cached. If this function
is called from define_split conditions, that can break insn splitting,
because split_insns works by clearing recog_data.insn and then modifying
recog_data.operand array and match the various split conditions. */
if (recog_data.insn != rinsn)
recog_data.insn = NULL;
/* distance_non_agu_define can call get_attr_type which can call
recog_memoized, restore recog_data back to previous content. */
recog_data = recog_data_save;

if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
{
Expand Down Expand Up @@ -14968,38 +14961,6 @@ ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
return dist_define >= dist_use;
}

/* Return true if it is legal to clobber flags by INSN and
false otherwise. */

static bool
ix86_ok_to_clobber_flags (rtx_insn *insn)
{
basic_block bb = BLOCK_FOR_INSN (insn);
df_ref use;
bitmap live;

while (insn)
{
if (NONDEBUG_INSN_P (insn))
{
FOR_EACH_INSN_USE (use, insn)
if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
return false;

if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
return true;
}

if (insn == BB_END (bb))
break;

insn = NEXT_INSN (insn);
}

live = df_get_live_out(bb);
return !REGNO_REG_SET_P (live, FLAGS_REG);
}

/* Return true if we need to split op0 = op1 + op2 into a sequence of
move and add to avoid AGU stalls. */

Expand All @@ -15012,10 +14973,6 @@ ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;

/* Check it is correct to split here. */
if (!ix86_ok_to_clobber_flags(insn))
return false;

regno0 = true_regnum (operands[0]);
regno1 = true_regnum (operands[1]);
regno2 = true_regnum (operands[2]);
Expand Down Expand Up @@ -15051,7 +15008,7 @@ ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
}

/* Return true if we need to split lea into a sequence of
instructions to avoid AGU stalls. */
instructions to avoid AGU stalls during peephole2. */

bool
ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
Expand All @@ -15071,10 +15028,6 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
&& REG_P (XEXP (operands[1], 0))))
return false;

/* Check if it is OK to split here. */
if (!ix86_ok_to_clobber_flags (insn))
return false;

ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);

Expand Down
38 changes: 18 additions & 20 deletions gcc/config/i386/i386.md
Expand Up @@ -5176,7 +5176,7 @@

;; Load effective address instructions

(define_insn_and_split "*lea<mode>"
(define_insn "*lea<mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
"ix86_hardreg_mov_ok (operands[0], operands[1])"
Expand All @@ -5189,38 +5189,36 @@
else
return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
}
"reload_completed && ix86_avoid_lea_for_addr (insn, operands)"
[(set_attr "type" "lea")
(set (attr "mode")
(if_then_else
(match_operand 1 "SImode_address_operand")
(const_string "SI")
(const_string "<MODE>")))])

(define_peephole2
[(set (match_operand:SWI48 0 "register_operand")
(match_operand:SWI48 1 "address_no_seg_operand"))]
"ix86_hardreg_mov_ok (operands[0], operands[1])
&& peep2_regno_dead_p (0, FLAGS_REG)
&& ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)"
[(const_int 0)]
{
machine_mode mode = <MODE>mode;
rtx pat;

/* ix86_avoid_lea_for_addr re-recognizes insn and may
change operands[] array behind our back. */
pat = PATTERN (curr_insn);

operands[0] = SET_DEST (pat);
operands[1] = SET_SRC (pat);

/* Emit all operations in SImode for zero-extended addresses. */
if (SImode_address_operand (operands[1], VOIDmode))
mode = SImode;

ix86_split_lea_for_addr (curr_insn, operands, mode);
ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode);

/* Zero-extend return register to DImode for zero-extended addresses. */
if (mode != <MODE>mode)
emit_insn (gen_zero_extendsidi2
(operands[0], gen_lowpart (mode, operands[0])));
emit_insn (gen_zero_extendsidi2 (operands[0],
gen_lowpart (mode, operands[0])));

DONE;
}
[(set_attr "type" "lea")
(set (attr "mode")
(if_then_else
(match_operand 1 "SImode_address_operand")
(const_string "SI")
(const_string "<MODE>")))])
})

;; Add instructions

Expand Down
15 changes: 15 additions & 0 deletions gcc/testsuite/gcc.dg/pr99104.c
@@ -0,0 +1,15 @@
/* PR target/99104 */
/* { dg-do compile { target int128 } } */
/* { dg-options "-O2 -fsel-sched-pipelining -fselective-scheduling2 -funroll-loops" } */

__int128 a;
int b;
int foo (void);

int __attribute__ ((simd))
bar (void)
{
a = ~a;
if (foo ())
b = 0;
}

0 comments on commit decd8fb

Please sign in to comment.