Skip to content

Commit

Permalink
[ARC] Reimplement return padding operation for ARC700.
Browse files Browse the repository at this point in the history
For ARC700, adding padding if necessary to avoid a mispredict.  A
return could happen immediately after the function start.  A
call/return and return/return must be 6 bytes apart to avoid
mispredict.

The old implementation was doing this operation very late in the
compilation process, and the additional nop instructions and/or
forcing some other instruction to take their long form was not taken
into account when generating brcc instructions. Thus, wrong code could
be generated.

gcc/
2017-03-24  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc-protos.h (arc_pad_return): Remove.
	* config/arc/arc.c (machine_function): Remove force_short_suffix
	and size_reason.
	(arc_print_operand): Adjust printing of '&'.
	(arc_verify_short): Remove conditional printing of short suffix.
	(arc_final_prescan_insn): Remove reference to size_reason.
	(pad_return): New function.
	(arc_reorg): Call pad_return.
	(arc_pad_return): Remove.
	(arc_init_machine_status): Remove reference to force_short_suffix.
	* config/arc/arc.md (vunspec): Add VUNSPEC_ARC_BLOCKAGE.
	(attr length): When attribute iscompact is true force to 2
	regardless; in the case of maybe check if we want to force the
	instruction to have 4 bytes length.
	(nopv): Change it to generate 4 byte long nop as well.
	(blockage): New pattern.
	(simple_return): Remove call to arc_pad_return.
	(p_return_i): Likewise.

gcc/testsuite/
2017-03-24  Claudiu Zissulescu  <claziss@synopsys.com>

	* gcc.target/arc/pr9001107555.c: New file.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@261542 138bc75d-0d04-0410-961f-82ee72b054a4
  • Loading branch information
claziss committed Jun 13, 2018
1 parent 3df4cca commit 5afc07e
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 93 deletions.
21 changes: 21 additions & 0 deletions gcc/ChangeLog
@@ -1,3 +1,24 @@
2018-06-12 Claudiu Zissulescu <claziss@synopsys.com>

* config/arc/arc-protos.h (arc_pad_return): Remove.
* config/arc/arc.c (machine_function): Remove force_short_suffix
and size_reason.
(arc_print_operand): Adjust printing of '&'.
(arc_verify_short): Remove conditional printing of short suffix.
(arc_final_prescan_insn): Remove reference to size_reason.
(pad_return): New function.
(arc_reorg): Call pad_return.
(arc_pad_return): Remove.
(arc_init_machine_status): Remove reference to force_short_suffix.
* config/arc/arc.md (vunspec): Add VUNSPEC_ARC_BLOCKAGE.
(attr length): When attribute iscompact is true force to 2
regardless; in the case of maybe check if we want to force the
instruction to have 4 bytes length.
(nopv): Change it to generate 4 byte long nop as well.
(blockage): New pattern.
(simple_return): Remove call to arc_pad_return.
(p_return_i): Likewise.

2018-06-12 Claudiu Zissulescu <claziss@synopsys.com>

* config/arc/elf.h (LINK_GCC_C_SEQUENCE_SPEC): Define.
Expand Down
1 change: 0 additions & 1 deletion gcc/config/arc/arc-protos.h
Expand Up @@ -89,7 +89,6 @@ extern void arc_clear_unalign (void);
extern void arc_toggle_unalign (void);
extern void split_addsi (rtx *);
extern void split_subsi (rtx *);
extern void arc_pad_return (void);
extern void arc_split_move (rtx *);
extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
extern rtx arc_regno_use_in (unsigned int, rtx);
Expand Down
156 changes: 72 additions & 84 deletions gcc/config/arc/arc.c
Expand Up @@ -2629,8 +2629,6 @@ typedef struct GTY (()) machine_function
struct arc_frame_info frame_info;
/* To keep track of unalignment caused by short insns. */
int unalign;
int force_short_suffix; /* Used when disgorging return delay slot insns. */
const char *size_reason;
struct arc_ccfsm ccfsm_current;
/* Map from uid to ccfsm state during branch shortening. */
rtx ccfsm_current_insn;
Expand Down Expand Up @@ -4288,7 +4286,7 @@ arc_print_operand (FILE *file, rtx x, int code)
}
break;
case '&':
if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
if (TARGET_ANNOTATE_ALIGN)
fprintf (file, "; unalign: %d", cfun->machine->unalign);
return;
case '+':
Expand Down Expand Up @@ -4961,18 +4959,13 @@ static int
arc_verify_short (rtx_insn *insn, int, int check_attr)
{
enum attr_iscompact iscompact;
struct machine_function *machine;

if (check_attr > 0)
{
iscompact = get_attr_iscompact (insn);
if (iscompact == ISCOMPACT_FALSE)
return 0;
}
machine = cfun->machine;

if (machine->force_short_suffix >= 0)
return machine->force_short_suffix;

return (get_attr_length (insn) & 2) != 0;
}
Expand Down Expand Up @@ -5011,8 +5004,6 @@ arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
cfun->machine->prescan_initialized = 1;
}
arc_ccfsm_advance (insn, &arc_ccfsm_current);

cfun->machine->size_reason = 0;
}

/* Given FROM and TO register numbers, say whether this elimination is allowed.
Expand Down Expand Up @@ -7654,6 +7645,76 @@ jli_call_scan (void)
}
}

/* Add padding if necessary to avoid a mispredict. A return could
happen immediately after the function start. A call/return and
return/return must be 6 bytes apart to avoid mispredict. */

static void
pad_return (void)
{
rtx_insn *insn;
long offset;

if (!TARGET_PAD_RETURN)
return;

for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
rtx_insn *prev0 = prev_active_insn (insn);
bool wantlong = false;

if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) != SIMPLE_RETURN)
continue;

if (!prev0)
{
prev0 = emit_insn_before (gen_nopv (), insn);
/* REG_SAVE_NOTE is used by Haifa scheduler, we are in reorg
so it is safe to reuse it for forcing a particular length
for an instruction. */
add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
emit_insn_before (gen_nopv (), insn);
continue;
}
offset = get_attr_length (prev0);

if (get_attr_length (prev0) == 2
&& get_attr_iscompact (prev0) != ISCOMPACT_TRUE)
{
/* Force long version of the insn. */
wantlong = true;
offset += 2;
}

rtx_insn *prev = prev_active_insn (prev0);
if (prev)
offset += get_attr_length (prev);

prev = prev_active_insn (prev);
if (prev)
offset += get_attr_length (prev);

switch (offset)
{
case 2:
prev = emit_insn_before (gen_nopv (), insn);
add_reg_note (prev, REG_SAVE_NOTE, GEN_INT (1));
break;
case 4:
emit_insn_before (gen_nopv (), insn);
break;
default:
continue;
}

if (wantlong)
add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));

/* Emit a blockage to avoid delay slot scheduling. */
emit_insn_before (gen_blockage (), insn);
}
}

static int arc_reorg_in_progress = 0;

/* ARC's machince specific reorg function. */
Expand All @@ -7679,6 +7740,7 @@ arc_reorg (void)

workaround_arc_anomaly ();
jli_call_scan ();
pad_return ();

/* FIXME: should anticipate ccfsm action, generate special patterns for
to-be-deleted branches that have no delay slot and have at least the
Expand Down Expand Up @@ -9237,79 +9299,6 @@ arc_branch_size_unknown_p (void)
return !optimize_size && arc_reorg_in_progress;
}

/* We are about to output a return insn. Add padding if necessary to avoid
a mispredict. A return could happen immediately after the function
start, but after a call we know that there will be at least a blink
restore. */

void
arc_pad_return (void)
{
rtx_insn *insn = current_output_insn;
rtx_insn *prev = prev_active_insn (insn);
int want_long;

if (!prev)
{
fputs ("\tnop_s\n", asm_out_file);
cfun->machine->unalign ^= 2;
want_long = 1;
}
/* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
because after a call, we'd have to restore blink first. */
else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
return;
else
{
want_long = (get_attr_length (prev) == 2);
prev = prev_active_insn (prev);
}
if (!prev
|| ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
NON_SIBCALL)
: CALL_ATTR (prev, NON_SIBCALL)))
{
if (want_long)
cfun->machine->size_reason
= "call/return and return/return must be 6 bytes apart to avoid mispredict";
else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
{
cfun->machine->size_reason
= "Long unaligned jump avoids non-delay slot penalty";
want_long = 1;
}
/* Disgorge delay insn, if there is any, and it may be moved. */
if (final_sequence
/* ??? Annulled would be OK if we can and do conditionalize
the delay slot insn accordingly. */
&& !INSN_ANNULLED_BRANCH_P (insn)
&& (get_attr_cond (insn) != COND_USE
|| !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
XVECEXP (final_sequence, 0, 1))))
{
prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
gcc_assert (!prev_real_insn (insn)
|| !arc_hazard (prev_real_insn (insn), prev));
cfun->machine->force_short_suffix = !want_long;
rtx save_pred = current_insn_predicate;
final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
cfun->machine->force_short_suffix = -1;
prev->set_deleted ();
current_output_insn = insn;
current_insn_predicate = save_pred;
}
else if (want_long)
fputs ("\tnop\n", asm_out_file);
else
{
fputs ("\tnop_s\n", asm_out_file);
cfun->machine->unalign ^= 2;
}
}
return;
}

/* The usual; we set up our machine_function data. */

static struct machine_function *
Expand All @@ -9318,7 +9307,6 @@ arc_init_machine_status (void)
struct machine_function *machine;
machine = ggc_cleared_alloc<machine_function> ();
machine->fn_type = ARC_FUNCTION_UNKNOWN;
machine->force_short_suffix = -1;

return machine;
}
Expand Down
26 changes: 18 additions & 8 deletions gcc/config/arc/arc.md
Expand Up @@ -161,6 +161,7 @@
VUNSPEC_ARC_CAS
VUNSPEC_ARC_SC
VUNSPEC_ARC_LL
VUNSPEC_ARC_BLOCKAGE
])

(define_constants
Expand Down Expand Up @@ -384,13 +385,18 @@
;; and insn lengths: insns with shimm values cannot be conditionally executed.
(define_attr "length" ""
(cond
[(eq_attr "iscompact" "true,maybe")
[(eq_attr "iscompact" "true")
(const_int 2)

(eq_attr "iscompact" "maybe")
(cond
[(eq_attr "type" "sfunc")
(cond [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC")
(const_int 12)]
(const_int 10))
(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)]
(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)
(match_test "find_reg_note (insn, REG_SAVE_NOTE, GEN_INT (1))")
(const_int 4)]
(const_int 2))

(eq_attr "iscompact" "true_limm")
Expand Down Expand Up @@ -4431,8 +4437,16 @@
""
"nop%?"
[(set_attr "type" "misc")
(set_attr "iscompact" "true")
(set_attr "length" "2")])
(set_attr "iscompact" "maybe")
(set_attr "length" "*")])

(define_insn "blockage"
[(unspec_volatile [(const_int 0)] VUNSPEC_ARC_BLOCKAGE)]
""
""
[(set_attr "length" "0")
(set_attr "type" "block")]
)

;; Split up troublesome insns for better scheduling.

Expand Down Expand Up @@ -4977,8 +4991,6 @@
{
return \"rtie\";
}
if (TARGET_PAD_RETURN)
arc_pad_return ();
output_asm_insn (\"j%!%* [%0]%&\", &reg);
return \"\";
}
Expand Down Expand Up @@ -5022,8 +5034,6 @@
arc_return_address_register (arc_compute_function_type
(cfun)));
if (TARGET_PAD_RETURN)
arc_pad_return ();
output_asm_insn (\"j%d0%!%# [%1]%&\", xop);
/* record the condition in case there is a delay insn. */
arc_ccfsm_record_condition (xop[0], false, insn, 0);
Expand Down
4 changes: 4 additions & 0 deletions gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
2018-06-12 Claudiu Zissulescu <claziss@synopsys.com>

* gcc.target/arc/pr9001107555.c: New file.

2018-06-12 Richard Sandiford <richard.sandiford@linaro.org>

* g++.dg/torture/aarch64-vect-init-1.C: New test.
Expand Down
51 changes: 51 additions & 0 deletions gcc/testsuite/gcc.target/arc/pr9001107555.c
@@ -0,0 +1,51 @@
/* { dg-do assemble } *
/* { dg-skip-if "" { ! { clmcpu } } } */
/* { dg-options "-O3 -funroll-loops -mno-sdata -mcpu=arc700" } */

typedef long long a __attribute__((__mode__(__DI__)));
typedef struct c c;

struct b
{
int d;
c *e;
};

enum { f };

typedef struct
{
a g;
a h;
int i;
} j;

struct c
{
int count;
int current;
};

int k;

extern void bar (int, long long);
int foo (struct b *demux, __builtin_va_list args)
{
c m = *demux->e;
j *n;
switch (k)
case f:
{
a o = __builtin_va_arg(args, a);
m.current = 0;
while (m.current < m.count)
{
if (n[m.current].h > o) {
bar (demux->d, 4 + 128LL * n[m.current].i);
break;
}
m.current++;
}
return 0;
}
}

0 comments on commit 5afc07e

Please sign in to comment.