Skip to content

Commit 49a714e

Browse files
author
Eric Botcazou
committed
Fix wrong result for 1.0/3.0 at -O2 -fno-omit-frame-pointer -frounding-math
This wrong-code PR for the C++ compiler on x86-64/Windows is a regression in GCC 9 and later, but the underlying issue has probably been there since SEH was implemented and is exposed by this comment in config/i386/winnt.c: /* SEH records saves relative to the "current" stack pointer, whether or not there's a frame pointer in place. This tracks the current stack pointer offset from the CFA. */ HOST_WIDE_INT sp_offset; That's not what the (current) Microsoft documentation says; instead it says: /* SEH records offsets relative to the lowest address of the fixed stack allocation. If there is no frame pointer, these offsets are from the stack pointer; if there is a frame pointer, these offsets are from the value of the stack pointer when the frame pointer was established, i.e. the frame pointer minus the offset in the .seh_setframe directive. */ That's why the implementation is correct only under the condition that the frame pointer be established *after* the fixed stack allocation; as a matter of fact, that's clearly the model underpinning SEH, but is the opposite of what is done e.g. on Linux. However the issue is mostly papered over in practice because: 1. SEH forces use_fast_prologue_epilogue to false, which in turns forces save_regs_using_mov to false, so the general regs are always pushed when they need to be saved, which eliminates the offset computation for them. 2. As soon as a frame is larger than 240 bytes, the frame pointer is fixed arbitrarily to 128 bytes above the stack pointer, which of course requires that it be established after the fixed stack allocation. So you need a small frame clobbering one of the call-saved XMM registers in order to generate wrong SEH unwind info. The attached fix makes sure that the frame pointer is always established after the fixed stack allocation by pointing it at or below the lowest used register save area, i.e. the SSE save area, and removing the special early saves in the prologue; the end result is a uniform prologue sequence for SEH whatever the frame size. And it avoids a discrepancy between cases where the number of saved general regs is even and cases where it is odd. gcc/ PR target/99234 * config/i386/i386.c (ix86_compute_frame_layout): For a SEH target, point the hard frame pointer to the SSE register save area instead of the general register save area. Perform only minimal adjustment for small frames if it is initially not correctly aligned. (ix86_expand_prologue): Remove early saves for a SEH target. * config/i386/winnt.c (struct seh_frame_state): Document constraint. gcc/testsuite/ * g++.dg/eh/seh-xmm-unwind.C: New test.
1 parent 356cd95 commit 49a714e

File tree

3 files changed

+88
-25
lines changed

3 files changed

+88
-25
lines changed

gcc/config/i386/i386.c

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6190,11 +6190,6 @@ ix86_compute_frame_layout (void)
61906190
offset += frame->nregs * UNITS_PER_WORD;
61916191
frame->reg_save_offset = offset;
61926192

6193-
/* On SEH target, registers are pushed just before the frame pointer
6194-
location. */
6195-
if (TARGET_SEH)
6196-
frame->hard_frame_pointer_offset = offset;
6197-
61986193
/* Calculate the size of the va-arg area (not including padding, if any). */
61996194
frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
62006195

@@ -6357,14 +6352,21 @@ ix86_compute_frame_layout (void)
63576352
the unwind data structure. */
63586353
if (TARGET_SEH)
63596354
{
6360-
HOST_WIDE_INT diff;
6355+
/* Force the frame pointer to point at or below the lowest register save
6356+
area, see the SEH code in config/i386/winnt.c for the rationale. */
6357+
frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
63616358

6362-
/* If we can leave the frame pointer where it is, do so. Also, returns
6359+
/* If we can leave the frame pointer where it is, do so. Also, return
63636360
the establisher frame for __builtin_frame_address (0). */
6364-
diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6365-
if (diff <= SEH_MAX_FRAME_SIZE
6366-
&& (diff > 240 || (diff & 15) != 0)
6367-
&& !crtl->accesses_prior_frames)
6361+
const HOST_WIDE_INT diff
6362+
= frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6363+
if (diff <= 255)
6364+
{
6365+
/* The resulting diff will be a multiple of 16 lower than 255,
6366+
i.e. at most 240 as required by the unwind data structure. */
6367+
frame->hard_frame_pointer_offset += (diff & 15);
6368+
}
6369+
else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
63686370
{
63696371
/* Ideally we'd determine what portion of the local stack frame
63706372
(within the constraint of the lowest 240) is most heavily used.
@@ -8170,17 +8172,6 @@ ix86_expand_prologue (void)
81708172
insn = emit_insn (gen_push (hard_frame_pointer_rtx));
81718173
RTX_FRAME_RELATED_P (insn) = 1;
81728174

8173-
/* Push registers now, before setting the frame pointer
8174-
on SEH target. */
8175-
if (!int_registers_saved
8176-
&& TARGET_SEH
8177-
&& !frame.save_regs_using_mov)
8178-
{
8179-
ix86_emit_save_regs ();
8180-
int_registers_saved = true;
8181-
gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8182-
}
8183-
81848175
if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
81858176
{
81868177
insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);

gcc/config/i386/winnt.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -830,9 +830,20 @@ i386_pe_asm_lto_end (void)
830830

831831
struct seh_frame_state
832832
{
833-
/* SEH records saves relative to the "current" stack pointer, whether
834-
or not there's a frame pointer in place. This tracks the current
835-
stack pointer offset from the CFA. */
833+
/* SEH records offsets relative to the lowest address of the fixed stack
834+
allocation. If there is no frame pointer, these offsets are from the
835+
stack pointer; if there is a frame pointer, these offsets are from the
836+
value of the stack pointer when the frame pointer was established, i.e.
837+
the frame pointer minus the offset in the .seh_setframe directive.
838+
839+
We do not distinguish these two cases, i.e. we consider that the offsets
840+
are always relative to the "current" stack pointer. This means that we
841+
need to perform the fixed stack allocation before establishing the frame
842+
pointer whenever there are registers to be saved, and this is guaranteed
843+
by the prologue provided that we force the frame pointer to point at or
844+
below the lowest used register save area, see ix86_compute_frame_layout.
845+
846+
This tracks the current stack pointer offset from the CFA. */
836847
HOST_WIDE_INT sp_offset;
837848

838849
/* The CFA is located at CFA_REG + CFA_OFFSET. */
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/* PR target/99234 */
2+
/* Test SEH unwinding of XMM register saves. */
3+
4+
/* { dg-do run { target { x86_64-*-mingw32 && lp64 } } } */
5+
6+
extern "C" void abort (void);
7+
extern "C" void exit (int);
8+
9+
void
10+
foo (void)
11+
{
12+
register __int128 xmm6 asm("xmm6") = 0;
13+
register __int128 xmm7 asm("xmm7") = 0;
14+
register __int128 xmm8 asm("xmm8") = 0;
15+
register __int128 xmm9 asm("xmm9") = 0;
16+
register __int128 xmm10 asm("xmm10") = 0;
17+
register __int128 xmm11 asm("xmm11") = 0;
18+
register __int128 xmm12 asm("xmm12") = 0;
19+
register __int128 xmm13 asm("xmm13") = 0;
20+
register __int128 xmm14 asm("xmm14") = 0;
21+
register __int128 xmm15 asm("xmm15") = 0;
22+
23+
__asm__ __volatile__ ("" : "+x" (xmm6), "+x" (xmm7), "+x" (xmm8), "+x" (xmm9),
24+
"+x" (xmm10), "+x" (xmm11), "+x" (xmm12), "+x" (xmm13),
25+
"+x" (xmm14), "+x" (xmm15));
26+
27+
throw 1;
28+
}
29+
30+
int
31+
main (void)
32+
{
33+
register __int128 xmm6 asm("xmm6") = 6;
34+
register __int128 xmm7 asm("xmm7") = 7;
35+
register __int128 xmm8 asm("xmm8") = 8;
36+
register __int128 xmm9 asm("xmm9") = 9;
37+
register __int128 xmm10 asm("xmm10") = 10;
38+
register __int128 xmm11 asm("xmm11") = 11;
39+
register __int128 xmm12 asm("xmm12") = 12;
40+
register __int128 xmm13 asm("xmm13") = 13;
41+
register __int128 xmm14 asm("xmm14") = 14;
42+
register __int128 xmm15 asm("xmm15") = 15;
43+
44+
__asm__ __volatile__ ("" : "+x" (xmm6), "+x" (xmm7), "+x" (xmm8), "+x" (xmm9),
45+
"+x" (xmm10), "+x" (xmm11), "+x" (xmm12), "+x" (xmm13),
46+
"+x" (xmm14), "+x" (xmm15));
47+
48+
try {
49+
foo ();
50+
} catch (...) {
51+
__asm__ __volatile__ ("" : "+x" (xmm6), "+x" (xmm7), "+x" (xmm8), "+x" (xmm9),
52+
"+x" (xmm10), "+x" (xmm11), "+x" (xmm12), "+x" (xmm13),
53+
"+x" (xmm14), "+x" (xmm15));
54+
55+
if (xmm6 != 6 || xmm7 != 7 || xmm8 != 8 || xmm9 != 9 || xmm10 != 10
56+
|| xmm11 != 11 || xmm12 != 12 || xmm13 != 13 || xmm14 != 14 || xmm15 != 15)
57+
abort ();
58+
}
59+
60+
exit (0);
61+
}

0 commit comments

Comments
 (0)