Skip to content

Commit

Permalink
Merge pull request #5064 from WalterBright/frame
Browse files Browse the repository at this point in the history
replace PUSH with SUB/MOV for stack frame setup
  • Loading branch information
andralex committed Sep 14, 2015
2 parents 23e4b48 + 88d89cf commit f39fd49
Show file tree
Hide file tree
Showing 3 changed files with 199 additions and 55 deletions.
31 changes: 26 additions & 5 deletions src/backend/cgcod.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ targ_size_t spoff;
targ_size_t Foff; // BP offset of floating register
targ_size_t CSoff; // offset of common sub expressions
targ_size_t NDPoff; // offset of saved 8087 registers
targ_size_t pushoff; // offset of saved registers
bool pushoffuse; // using pushoff
int BPoff; // offset from BP
int EBPtoESP; // add to EBP offset to get ESP offset
int AllocaOff; // offset of alloca temporary
Expand Down Expand Up @@ -772,18 +774,37 @@ code *prolog()
NDPoff = CSoff;
#endif

//printf("Fast.size = x%x, Auto.size = x%x\n", (int)Fast.size, (int)Auto.size);
regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving
pushoffuse = false;
pushoff = NDPoff;
if (config.flags4 & CFG4speed && (I32 || I64))
{
/* Instead of pushing the registers onto the stack one by one,
* allocate space in the stack frame and copy/restore them there.
*/
int xmmtopush = numbitsset(topush & XMMREGS); // XMM regs take 16 bytes
int gptopush = numbitsset(topush) - xmmtopush; // general purpose registers to save
if (NDPoff || xmmtopush)
{
pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16),
xmmtopush ? STACKALIGN : REGSIZE, bias);
pushoffuse = true; // tell others we're using this strategy
}
}

localsize = -NDPoff;
//printf("Fast.size = x%x, Auto.size = x%x\n", (int)Fast.size, (int)Auto.size);

regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving
int npush = numbitsset(topush); // number of registers that need saving
npush += numbitsset(topush & XMMREGS); // XMM regs take 16 bytes, so count them twice
localsize = -pushoff;

// Keep the stack aligned by 8 for any subsequent function calls
if (!I16 && calledafunc &&
(STACKALIGN == 16 || config.flags4 & CFG4stackalign))
{
int npush = numbitsset(topush); // number of registers that need saving
npush += numbitsset(topush & XMMREGS); // XMM regs take 16 bytes, so count them twice
if (pushoffuse)
npush = 0;

//printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n",
// npush, Para.size, needframe, localsize);

Expand Down
220 changes: 170 additions & 50 deletions src/backend/cod3.c
Original file line number Diff line number Diff line change
Expand Up @@ -3112,34 +3112,183 @@ code* prolog_setupalloca()
return c;
}

/**************************************
* Save registers that the function destroys,
* but that the ABI says should be preserved across
* function calls.
*/

code* prolog_saveregs(code *c, regm_t topush)
{
while (topush) /* while registers to push */
{ unsigned reg = findreg(topush);
topush &= ~mask[reg];
if (reg >= XMM0)
if (pushoffuse)
{
// Save to preallocated section in the stack frame
int xmmtopush = numbitsset(topush & XMMREGS); // XMM regs take 16 bytes
int gptopush = numbitsset(topush) - xmmtopush; // general purpose registers to save
targ_size_t xmmoffset = pushoff + BPoff;
if (!hasframe)
xmmoffset += EBPtoESP;
targ_size_t gpoffset = xmmoffset + xmmtopush * 16;
while (topush)
{
// SUB RSP,16
c = cod3_stackadj(c, 16);
// MOVUPD 0[RSP],xmm
c = genc1(c,STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
EBPtoESP += 16;
spoff += 16;
unsigned reg = findreg(topush);
topush &= ~mask[reg];
if (reg >= XMM0)
{
if (hasframe)
{
// MOVUPD xmmoffset[EBP],xmm
c = genc1(c,STOUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset);
}
else
{
// MOVUPD xmmoffset[ESP],xmm
c = genc1(c,STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset);
}
xmmoffset += 16;
}
else
{
if (hasframe)
{
// MOV gpoffset[EBP],reg
c = genc1(c,0x89,modregxrm(2,reg,BPRM),FLconst,gpoffset);
}
else
{
// MOV gpoffset[ESP],reg
c = genc1(c,0x89,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset);
}
if (I64)
code_orrex(c, REX_W);
if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)
{ // Emit debug_frame data giving location of saved register
pinholeopt(c, NULL);
dwarf_CFA_set_loc(calcblksize(c)); // address after save
dwarf_CFA_offset(reg, gpoffset);
}
gpoffset += REGSIZE;
}
}
else
}
else
{
while (topush) /* while registers to push */
{
unsigned reg = findreg(topush);
topush &= ~mask[reg];
if (reg >= XMM0)
{
// SUB RSP,16
c = cod3_stackadj(c, 16);
// MOVUPD 0[RSP],xmm
c = genc1(c,STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
EBPtoESP += 16;
spoff += 16;
}
else
{
c = genpush(c, reg);
EBPtoESP += REGSIZE;
spoff += REGSIZE;
if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)
{ // Emit debug_frame data giving location of saved register
// relative to 0[EBP]
pinholeopt(c, NULL);
dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg
dwarf_CFA_offset(reg, -EBPtoESP - REGSIZE);
}
}
}
}
return c;
}

/**************************************
* Undo prolog_saveregs()
*/

code* epilog_restoreregs(code *c, regm_t topop)
{
#ifdef DEBUG
if (topop & ~(XMMREGS | 0xFFFF))
printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg));
#endif
assert(!(topop & ~(XMMREGS | 0xFFFF)));
if (pushoffuse)
{
// Save to preallocated section in the stack frame
int xmmtopop = numbitsset(topop & XMMREGS); // XMM regs take 16 bytes
int gptopop = numbitsset(topop) - xmmtopop; // general purpose registers to save
targ_size_t xmmoffset = pushoff + BPoff;
if (!hasframe)
xmmoffset += EBPtoESP;
targ_size_t gpoffset = xmmoffset + xmmtopop * 16;
while (topop)
{
c = genpush(c, reg);
EBPtoESP += REGSIZE;
spoff += REGSIZE;
if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)
{ // Emit debug_frame data giving location of saved register
// relative to 0[EBP]
pinholeopt(c, NULL);
dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg
dwarf_CFA_offset(reg, -EBPtoESP - REGSIZE);
unsigned reg = findreg(topop);
topop &= ~mask[reg];
if (reg >= XMM0)
{
if (hasframe)
{
// MOVUPD xmm,xmmoffset[EBP]
c = genc1(c,LODUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset);
}
else
{
// MOVUPD xmm,xmmoffset[ESP]
c = genc1(c,LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset);
}
xmmoffset += 16;
}
else
{
if (hasframe)
{
// MOV reg,gpoffset[EBP]
c = genc1(c,0x8B,modregxrm(2,reg,BPRM),FLconst,gpoffset);
}
else
{
// MOV reg,gpoffset[ESP]
c = genc1(c,0x8B,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset);
}
if (I64)
code_orrex(c, REX_W);
gpoffset += REGSIZE;
}
}
}
else
{
unsigned reg = I64 ? XMM7 : DI;
if (!(topop & XMMREGS))
reg = R15;
regm_t regm = 1 << reg;

while (topop)
{ if (topop & regm)
{
if (reg >= XMM0)
{
// MOVUPD xmm,0[RSP]
c = genc1(c,LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
// ADD RSP,16
c = cod3_stackadj(c, -16);
}
else
{
c = gen1(c,0x58 + (reg & 7)); // POP reg
if (reg & 8)
code_orrex(c, REX_B);
}
topop &= ~regm;
}
regm >>= 1;
reg--;
}
}
return c;
}

Expand Down Expand Up @@ -3684,36 +3833,7 @@ void epilog(block *b)
* order they were pushed.
*/
topop = fregsaved & ~mfuncreg;
#ifdef DEBUG
if (topop & ~(XMMREGS | 0xFFFF))
printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg));
#endif
assert(!(topop & ~(XMMREGS | 0xFFFF)));
reg = I64 ? XMM7 : DI;
if (!(topop & XMMREGS))
reg = R15;
regm = 1 << reg;
while (topop)
{ if (topop & regm)
{
if (reg >= XMM0)
{
// MOVUPD xmm,0[RSP]
c = genc1(c,LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
// ADD RSP,16
c = cod3_stackadj(c, -16);
}
else
{
c = gen1(c,0x58 + (reg & 7)); // POP reg
if (reg & 8)
code_orrex(c, REX_B);
}
topop &= ~regm;
}
regm >>= 1;
reg--;
}
c = epilog_restoreregs(c, topop);

#if MARS
if (usednteh & NTEHjmonitor)
Expand Down
3 changes: 3 additions & 0 deletions src/backend/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ extern targ_size_t spoff;
extern targ_size_t Foff; // BP offset of floating register
extern targ_size_t CSoff; // offset of common sub expressions
extern targ_size_t NDPoff; // offset of saved 8087 registers
extern targ_size_t pushoff; // offset of saved registers
extern bool pushoffuse; // using pushoff
extern int BPoff; // offset from BP
extern int EBPtoESP; // add to EBP offset to get ESP offset
extern int AllocaOff; // offset of alloca temporary
Expand All @@ -405,6 +407,7 @@ code* prolog_frameadj(tym_t tyf, unsigned xlocalsize, bool enter, bool* pushallo
code* prolog_frameadj2(tym_t tyf, unsigned xlocalsize, bool* pushalloc);
code* prolog_setupalloca();
code* prolog_saveregs(code *c, regm_t topush);
code* epilog_restoreregs(code *c, regm_t topop);
code* prolog_trace(bool farfunc, unsigned* regsaved);
code* prolog_gen_win64_varargs();
code* prolog_genvarargs(symbol* sv, regm_t* namedargs);
Expand Down

0 comments on commit f39fd49

Please sign in to comment.