Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 1c3336e

Browse files
committed
[Arm64] SIMDIntrinsicGetItem contain mem op1
1 parent 2005790 commit 1c3336e

File tree

3 files changed

+121
-31
lines changed

3 files changed

+121
-31
lines changed

src/jit/codegenarm64.cpp

Lines changed: 85 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4578,48 +4578,112 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
45784578
// - the source of SIMD type (op1)
45794579
// - the index of the value to be returned.
45804580
genConsumeOperands(simdNode);
4581-
regNumber srcReg = op1->gtRegNum;
45824581

4583-
// TODO-ARM64-CQ Optimize SIMDIntrinsicGetItem
4584-
// Optimize the case of op1 is in memory and trying to access ith element.
4585-
assert(op1->isUsedFromReg());
4586-
4587-
emitAttr baseTypeSize = emitTypeSize(baseType);
4582+
emitAttr baseTypeSize = emitTypeSize(baseType);
4583+
unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
45884584

45894585
if (op2->IsCnsIntOrI())
45904586
{
45914587
assert(op2->isContained());
45924588

45934589
ssize_t index = op2->gtIntCon.gtIconVal;
45944590

4591+
// We only need to generate code for the get if the index is valid
4592+
// If the index is invalid, previously generated for the range check will throw
45954593
if (getEmitter()->isValidVectorIndex(emitTypeSize(simdType), baseTypeSize, index))
45964594
{
4597-
// Only generate code for the get if the index is valid
4598-
// Otherwise generated code will throw
4599-
getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index);
4595+
if (op1->isContained())
4596+
{
4597+
int offset = (int)index * genTypeSize(baseType);
4598+
instruction ins = ins_Load(baseType);
4599+
baseTypeSize = varTypeIsFloating(baseType)
4600+
? baseTypeSize
4601+
: getEmitter()->emitInsAdjustLoadStoreAttr(ins, baseTypeSize);
4602+
4603+
assert(!op1->isUsedFromReg());
4604+
4605+
if (op1->OperIsLocal())
4606+
{
4607+
unsigned varNum = op1->gtLclVarCommon.gtLclNum;
4608+
4609+
getEmitter()->emitIns_R_S(ins, baseTypeSize, targetReg, varNum, offset);
4610+
}
4611+
else
4612+
{
4613+
assert(op1->OperGet() == GT_IND);
4614+
4615+
GenTree* addr = op1->AsIndir()->Addr();
4616+
assert(!addr->isContained());
4617+
regNumber baseReg = addr->gtRegNum;
4618+
4619+
// ldr targetReg, [baseReg, #offset]
4620+
getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, baseReg, offset);
4621+
}
4622+
}
4623+
else
4624+
{
4625+
assert(op1->isUsedFromReg());
4626+
regNumber srcReg = op1->gtRegNum;
4627+
4628+
// mov targetReg, srcReg[#index]
4629+
getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index);
4630+
}
46004631
}
46014632
}
46024633
else
46034634
{
4604-
unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
4605-
noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);
4635+
assert(!op2->isContained());
46064636

4637+
regNumber baseReg = REG_NA;
46074638
regNumber indexReg = op2->gtRegNum;
4608-
regNumber tmpReg = simdNode->ExtractTempReg();
46094639

4610-
assert(genIsValidIntReg(tmpReg));
4611-
assert(tmpReg != indexReg);
4640+
if (op1->isContained())
4641+
{
4642+
// Optimize the case of op1 is in memory and trying to access ith element.
4643+
assert(!op1->isUsedFromReg());
4644+
if (op1->OperIsLocal())
4645+
{
4646+
unsigned varNum = op1->gtLclVarCommon.gtLclNum;
4647+
4648+
baseReg = simdNode->ExtractTempReg();
4649+
4650+
// Load the address of varNum
4651+
getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0);
4652+
}
4653+
else
4654+
{
4655+
// Require GT_IND addr to be not contained.
4656+
assert(op1->OperGet() == GT_IND);
4657+
4658+
GenTree* addr = op1->AsIndir()->Addr();
4659+
assert(!addr->isContained());
46124660

4613-
unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
4661+
baseReg = addr->gtRegNum;
4662+
}
4663+
}
4664+
else
4665+
{
4666+
assert(op1->isUsedFromReg());
4667+
regNumber srcReg = op1->gtRegNum;
46144668

4615-
// Load the address of simdInitTempVarNum
4616-
getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, tmpReg, simdInitTempVarNum, 0);
4669+
unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
4670+
noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);
4671+
4672+
baseReg = simdNode->ExtractTempReg();
4673+
4674+
// Load the address of simdInitTempVarNum
4675+
getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0);
4676+
4677+
// Store the vector to simdInitTempVarNum
4678+
getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, baseReg);
4679+
}
46174680

4618-
// Store the vector to simdInitTempVarNum
4619-
getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, tmpReg);
4681+
assert(genIsValidIntReg(indexReg));
4682+
assert(genIsValidIntReg(baseReg));
4683+
assert(baseReg != indexReg);
46204684

4621-
// Load item at simdInitTempVarNum[index]
4622-
getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, tmpReg, indexReg, INS_OPTS_LSL,
4685+
// Load item at baseReg[index]
4686+
getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, baseReg, indexReg, INS_OPTS_LSL,
46234687
baseTypeScale);
46244688
}
46254689

src/jit/lowerarmarch.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -785,19 +785,27 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
785785

786786
case SIMDIntrinsicGetItem:
787787
{
788-
// TODO-ARM64-CQ Support containing op1 memory ops
789-
790788
// This implements get_Item method. The sources are:
791789
// - the source SIMD struct
792790
// - index (which element to get)
793791
// The result is baseType of SIMD struct.
792+
op1 = simdNode->gtOp.gtOp1;
794793
op2 = simdNode->gtOp.gtOp2;
795794

796795
// If the index is a constant, mark it as contained.
797796
if (op2->IsCnsIntOrI())
798797
{
799798
MakeSrcContained(simdNode, op2);
800799
}
800+
801+
if (IsContainableMemoryOp(op1))
802+
{
803+
MakeSrcContained(simdNode, op1);
804+
if (op1->OperGet() == GT_IND)
805+
{
806+
op1->AsIndir()->Addr()->ClearContained();
807+
}
808+
}
801809
break;
802810
}
803811

src/jit/lsraarm64.cpp

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -815,20 +815,38 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
815815
break;
816816

817817
case SIMDIntrinsicGetItem:
818+
op1 = simdTree->gtGetOp1();
819+
op2 = simdTree->gtGetOp2();
820+
818821
// We have an object and an item, which may be contained.
819-
info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2;
822+
info->srcCount = (op2->isContained() ? 1 : 2);
820823

821-
if (!simdTree->gtGetOp2()->IsCnsIntOrI())
824+
if (op1->isContained())
822825
{
823-
// If the index is not a constant, we will need a general purpose register
824-
info->internalIntCount = 1;
826+
// Although GT_IND of TYP_SIMD12 reserves an internal register for reading 4 and 8 bytes from memory
827+
// and assembling them into target reg, it is not required in this case.
828+
op1->gtLsraInfo.internalIntCount = 0;
829+
op1->gtLsraInfo.internalFloatCount = 0;
830+
info->srcCount -= 1;
831+
info->srcCount += GetOperandSourceCount(op1);
832+
}
825833

826-
// If the index is not a constant, we will use the SIMD temp location to store the vector.
827-
compiler->getSIMDInitTempVarNum();
834+
if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
835+
{
836+
// If the index is not a constant and not contained or is a local
837+
// we will need a general purpose register to calculate the address
838+
info->internalIntCount = 1;
828839

829840
// internal register must not clobber input index
830-
simdTree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true;
831-
info->hasDelayFreeSrc = true;
841+
op2->gtLsraInfo.isDelayFree = true;
842+
info->hasDelayFreeSrc = true;
843+
}
844+
845+
if (!op2->IsCnsIntOrI() && (!op1->isContained()))
846+
{
847+
// If vector is not already in memory (contained) and the index is not a constant,
848+
// we will use the SIMD temp location to store the vector.
849+
compiler->getSIMDInitTempVarNum();
832850
}
833851
break;
834852

0 commit comments

Comments
 (0)