Skip to content

Commit

Permalink
Merge pull request #6258 from WalterBright/unaligned-xmm-store
Browse files Browse the repository at this point in the history
support some unaligned XMM stores
  • Loading branch information
WalterBright committed Nov 21, 2016
2 parents 2fa54ac + 21fec29 commit 1e69da3
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 20 deletions.
56 changes: 41 additions & 15 deletions src/backend/cgxmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags)
c = movregconst(c,r,p[1],0);
c = genfltreg(c,0x89,r,4); // MOV floatreg+4,r

unsigned op = xmmload(TYdouble);
unsigned op = xmmload(TYdouble, true);
c = genfltreg(c,op,xreg - XMM0,0); // MOVSD XMMreg,floatreg
}
else
Expand Down Expand Up @@ -209,7 +209,8 @@ code *xmmeq(elem *e, unsigned op, elem *e1, elem *e2,regm_t *pretregs)
if (!(retregs & XMMREGS))
retregs = XMMREGS; // pick any XMM reg

cs.Iop = (op == OPeq) ? xmmstore(tyml) : op;
bool aligned = xmmIsAligned(e1);
cs.Iop = (op == OPeq) ? xmmstore(tyml, aligned) : op;
regvar = FALSE;
varregm = 0;
if (config.flags4 & CFG4optimized)
Expand Down Expand Up @@ -478,7 +479,7 @@ code *xmmopass(elem *e,regm_t *pretregs)
if (!retregs)
retregs = XMMREGS & ~rretregs;
cg = allocreg(&retregs,&reg,ty1);
cs.Iop = xmmload(ty1); // MOVSD xmm,xmm_m64
cs.Iop = xmmload(ty1, true); // MOVSD xmm,xmm_m64
code_newreg(&cs,reg - XMM0);
cg = gen(cg,&cs);
}
Expand All @@ -488,7 +489,7 @@ code *xmmopass(elem *e,regm_t *pretregs)

if (!regvar)
{
cs.Iop = xmmstore(ty1); // reverse operand order of MOVS[SD]
cs.Iop = xmmstore(ty1,true); // reverse operand order of MOVS[SD]
gen(co,&cs);
}

Expand Down Expand Up @@ -548,7 +549,7 @@ code *xmmpost(elem *e,regm_t *pretregs)
retregs = XMMREGS;
c = allocreg(&retregs,&reg,ty1);
cdb.append(c);
cs.Iop = xmmload(ty1); // MOVSD xmm,xmm_m64
cs.Iop = xmmload(ty1, true); // MOVSD xmm,xmm_m64
code_newreg(&cs,reg - XMM0);
cdb.gen(&cs);
}
Expand All @@ -561,7 +562,7 @@ code *xmmpost(elem *e,regm_t *pretregs)
code *c = allocreg(&resultregs, &resultreg, ty1);
cdb.append(c);

cdb.gen2(xmmload(ty1),modregxrmx(3,resultreg-XMM0,reg-XMM0)); // MOVSS/D resultreg,reg
cdb.gen2(xmmload(ty1,true),modregxrmx(3,resultreg-XMM0,reg-XMM0)); // MOVSS/D resultreg,reg

regm_t rretregs = XMMREGS & ~(*pretregs | retregs | resultregs);
if (!rretregs)
Expand All @@ -575,7 +576,7 @@ code *xmmpost(elem *e,regm_t *pretregs)

if (!regvar)
{
cs.Iop = xmmstore(ty1); // reverse operand order of MOVS[SD]
cs.Iop = xmmstore(ty1,true); // reverse operand order of MOVS[SD]
cdb.gen(&cs);
}

Expand Down Expand Up @@ -636,9 +637,12 @@ code *xmmneg(elem *e,regm_t *pretregs)
* Get correct load operator based on type.
* It is important to use the right one even if the number of bits moved is the same,
* as there are performance consequences for using the wrong one.
* Params:
* tym = type of data to load
* aligned = for vectors, true if aligned to 16 bytes
*/

unsigned xmmload(tym_t tym)
unsigned xmmload(tym_t tym, bool aligned)
{ unsigned op;
switch (tybasic(tym))
{
Expand All @@ -655,16 +659,16 @@ unsigned xmmload(tym_t tym)
case TYcdouble:
case TYidouble: op = LODSD; break; // MOVSD

case TYfloat4: op = LODAPS; break; // MOVAPS
case TYdouble2: op = LODAPD; break; // MOVAPD
case TYfloat4: op = aligned ? LODAPS : LODUPS; break; // MOVAPS / MOVUPS
case TYdouble2: op = aligned ? LODAPD : LODUPD; break; // MOVAPD / MOVUPD
case TYschar16:
case TYuchar16:
case TYshort8:
case TYushort8:
case TYlong4:
case TYulong4:
case TYllong2:
case TYullong2: op = LODDQA; break; // MOVDQA
case TYullong2: op = aligned ? LODDQA : LODDQU; break; // MOVDQA / MOVDQU

default:
printf("tym = x%x\n", tym);
Expand All @@ -677,7 +681,7 @@ unsigned xmmload(tym_t tym)
* Get correct store operator based on type.
*/

unsigned xmmstore(tym_t tym)
unsigned xmmstore(tym_t tym, bool aligned)
{ unsigned op;
switch (tybasic(tym))
{
Expand All @@ -694,16 +698,16 @@ unsigned xmmstore(tym_t tym)
case TYcdouble:
case TYcfloat: op = STOSD; break; // MOVSD

case TYfloat4: op = STOAPS; break; // MOVAPS
case TYdouble2: op = STOAPD; break; // MOVAPD
case TYfloat4: op = aligned ? STOAPS : STOUPS; break; // MOVAPS / MOVUPS
case TYdouble2: op = aligned ? STOAPD : STOUPD; break; // MOVAPD / MOVUPD
case TYschar16:
case TYuchar16:
case TYshort8:
case TYushort8:
case TYlong4:
case TYulong4:
case TYllong2:
case TYullong2: op = STODQA; break; // MOVDQA
case TYullong2: op = aligned ? STODQA : STODQU; break; // MOVDQA / MOVDQU

default:
printf("tym = x%x\n", tym);
Expand All @@ -712,6 +716,7 @@ unsigned xmmstore(tym_t tym)
return op;
}


/************************************
* Get correct XMM operator based on type and operator.
*/
Expand Down Expand Up @@ -1302,4 +1307,25 @@ code *cdvecfill(elem *e, regm_t *pretregs)
return cdb.finish();
}

/*******************************************
* Determine if lvalue e is a vector aligned on a 16 byte boundary.
* Assume it to be aligned unless can prove it is not.
* Params:
* e = lvalue
* Returns:
* false if definitely not aligned
*/

bool xmmIsAligned(elem *e)
{
if (tyvector(e->Ety) && e->Eoper == OPvar)
{
Symbol *s = e->EV.sp.Vsym;
if (s->Salignsize() < 16 ||
e->EV.sp.Voffset & (16 - 1))
return false; // definitely not aligned
}
return true; // assume aligned
}

#endif // !SPP
6 changes: 3 additions & 3 deletions src/backend/cod1.c
Original file line number Diff line number Diff line change
Expand Up @@ -4746,8 +4746,8 @@ code *loaddata(elem *e,regm_t *pretregs)
reg = e->EV.sp.Voffset ? e->EV.sp.Vsym->Spreg2 : e->EV.sp.Vsym->Spreg;
forregs = mask[reg];
#ifdef DEBUG
if (debugr)
printf("%s is fastpar and using register %s\n", e->EV.sp.Vsym->Sident, regm_str(forregs));
// if (debugr)
// printf("%s is fastpar and using register %s\n", e->EV.sp.Vsym->Sident, regm_str(forregs));
#endif
mfuncreg &= ~forregs;
regcon.used |= forregs;
Expand Down Expand Up @@ -4802,7 +4802,7 @@ code *loaddata(elem *e,regm_t *pretregs)
// Can't load from registers directly to XMM regs
//e->EV.sp.Vsym->Sflags &= ~GTregcand;

op = xmmload(tym);
op = xmmload(tym, xmmIsAligned(e));
if (e->Eoper == OPvar)
{ symbol *s = e->EV.sp.Vsym;
if (s->Sfl == FLreg && !(mask[s->Sreglsw] & XMMREGS))
Expand Down
5 changes: 3 additions & 2 deletions src/backend/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,11 +467,12 @@ code *xmmcnvt(elem *e,regm_t *pretregs);
code *xmmopass(elem *e, regm_t *pretregs);
code *xmmpost(elem *e, regm_t *pretregs);
code *xmmneg(elem *e, regm_t *pretregs);
unsigned xmmload(tym_t tym);
unsigned xmmstore(tym_t tym);
unsigned xmmload(tym_t tym, bool aligned = true);
unsigned xmmstore(tym_t tym, bool aligned = true);
code *cdvector(elem *e, regm_t *pretregs);
code *cdvecsto(elem *e, regm_t *pretregs);
code *cdvecfill(elem *e, regm_t *pretregs);
bool xmmIsAligned(elem *e);

/* cg87.c */
void note87(elem *e, unsigned offset, int i);
Expand Down
30 changes: 30 additions & 0 deletions test/runnable/testxmm.d
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,35 @@ void test16703()

/*****************************************/

struct Sunsto
{
align (1): // make sure f4 is misaligned
byte b;
union
{
float4 f4;
ubyte[16] a;
}
}

ubyte[16] foounsto()
{
float4 vf = 6;
Sunsto s;
s.f4 = vf * 2;
vf = s.f4;

return s.a;
}

void testOPvecunsto()
{
auto a = foounsto();
assert(a == [0, 0, 64, 65, 0, 0, 64, 65, 0, 0, 64, 65, 0, 0, 64, 65]);
}

/*****************************************/

int main()
{
test1();
Expand Down Expand Up @@ -1714,6 +1743,7 @@ int main()
testprefetch();
test16448();
test16703();
testOPvecunsto();

return 0;
}
Expand Down

0 comments on commit 1e69da3

Please sign in to comment.