Skip to content

Commit

Permalink
[NFC] Introduce a type to model memory operation
Browse files Browse the repository at this point in the history
Summary: This is a first step before changing the types to llvm::Align and introduce functions to ease client code.

Reviewers: courbet

Subscribers: arsenm, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jrtc27, atanasyan, jsji, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73785
  • Loading branch information
gchatelet committed Jan 31, 2020
1 parent edc3f4f commit 3c89b75
Show file tree
Hide file tree
Showing 20 changed files with 164 additions and 223 deletions.
73 changes: 49 additions & 24 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Expand Up @@ -106,6 +106,49 @@ namespace Sched {

} // end namespace Sched

// MemOp models a memory operation, either memset or memcpy/memmove.
struct MemOp {
// Shared
uint64_t Size;
unsigned DstAlign; // Specified alignment of the memory operation or zero if
// destination alignment can satisfy any constraint.
bool AllowOverlap;
// memset only
bool IsMemset; // If setthis memory operation is a memset.
bool ZeroMemset; // If set clears out memory with zeros.
// memcpy only
bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
// constant so it does not need to be loaded.
unsigned SrcAlign; // Inferred alignment of the source or zero if the memory
// operation does not need to load the value.

static MemOp Copy(uint64_t Size, bool DstAlignCanChange, unsigned DstAlign,
unsigned SrcAlign, bool IsVolatile,
bool MemcpyStrSrc = false) {
return {
/*.Size =*/Size,
/*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign,
/*.AllowOverlap =*/!IsVolatile,
/*.IsMemset =*/false,
/*.ZeroMemset =*/false,
/*.MemcpyStrSrc =*/MemcpyStrSrc,
/*.SrcAlign =*/SrcAlign,
};
}
static MemOp Set(uint64_t Size, bool DstAlignCanChange, unsigned DstAlign,
bool IsZeroMemset, bool IsVolatile) {
return {
/*.Size =*/Size,
/*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign,
/*.AllowOverlap =*/!IsVolatile,
/*.IsMemset =*/true,
/*.ZeroMemset =*/IsZeroMemset,
/*.MemcpyStrSrc =*/false,
/*.SrcAlign =*/0,
};
}
};

/// This base class for TargetLowering contains the SelectionDAG-independent
/// parts that can be used from the rest of CodeGen.
class TargetLoweringBase {
Expand Down Expand Up @@ -1518,29 +1561,17 @@ class TargetLoweringBase {

/// Returns the target specific optimal type for load and store operations as
/// a result of memset, memcpy, and memmove lowering.
///
/// If DstAlign is zero that means it's safe to destination alignment can
/// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
/// a need to check it against alignment requirement, probably because the
/// source does not need to be loaded. If 'IsMemset' is true, that means it's
/// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
/// does not need to be loaded. It returns EVT::Other if the type should be
/// determined using generic target-independent logic.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
virtual EVT
getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
unsigned /*SrcAlign*/, bool /*IsMemset*/,
bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
getOptimalMemOpType(const MemOp &Op,
const AttributeList & /*FuncAttributes*/) const {
return MVT::Other;
}


/// LLT returning variant.
virtual LLT
getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/,
unsigned /*SrcAlign*/, bool /*IsMemset*/,
bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
getOptimalMemOpLLT(const MemOp &Op,
const AttributeList & /*FuncAttributes*/) const {
return LLT();
}
Expand Down Expand Up @@ -3102,14 +3133,8 @@ class TargetLowering : public TargetLoweringBase {
/// Return true if the number of memory ops is below the threshold (Limit).
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
const MemOp &Op, unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const;

/// Check to see if the specified operand of the specified instruction is a
Expand Down
66 changes: 29 additions & 37 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Expand Up @@ -855,37 +855,30 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {

// Returns a list of types to use for memory op lowering in MemOps. A partial
// port of findOptimalMemOpLowering in TargetLowering.
static bool findGISelOptimalMemOpLowering(
std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes, const TargetLowering &TLI) {
// If 'SrcAlign' is zero, that means the memory operation does not need to
// load the value, i.e. memset or memcpy from constant string. Otherwise,
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
// is the specified alignment of the memory operation. If it is zero, that
// means it's possible to change the alignment of the destination.
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
if (SrcAlign != 0 && SrcAlign < DstAlign)
static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
unsigned Limit, const MemOp &Op,
unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes,
const TargetLowering &TLI) {
if (Op.SrcAlign != 0 && Op.SrcAlign < Op.DstAlign)
return false;

LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
ZeroMemset, MemcpyStrSrc, FuncAttributes);
LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);

if (Ty == LLT()) {
// Use the largest scalar type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
Ty = LLT::scalar(64);
while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
!TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
while (Op.DstAlign && Op.DstAlign < Ty.getSizeInBytes() &&
!TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.DstAlign))
Ty = LLT::scalar(Ty.getSizeInBytes());
assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
// FIXME: check for the largest legal type we can load/store to.
}

unsigned NumMemOps = 0;
auto Size = Op.Size;
while (Size != 0) {
unsigned TySize = Ty.getSizeInBytes();
while (TySize > Size) {
Expand All @@ -904,9 +897,9 @@ static bool findGISelOptimalMemOpLowering(
bool Fast;
// Need to get a VT equivalent for allowMisalignedMemoryAccesses().
MVT VT = getMVTForLLT(Ty);
if (NumMemOps && AllowOverlap && NewTySize < Size &&
if (NumMemOps && Op.AllowOverlap && NewTySize < Size &&
TLI.allowsMisalignedMemoryAccesses(
VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
VT, DstAS, Op.DstAlign, MachineMemOperand::MONone, &Fast) &&
Fast)
TySize = Size;
else {
Expand Down Expand Up @@ -988,12 +981,13 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;

if (!findGISelOptimalMemOpLowering(
MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
/*IsMemset=*/true,
/*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
/*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
MF.getFunction().getAttributes(), TLI))
if (!findGISelOptimalMemOpLowering(MemOps, Limit,
MemOp::Set(KnownLen, DstAlignCanChange,
Align,
/*IsZeroMemset=*/IsZeroVal,
/*IsVolatile=*/IsVolatile),
DstPtrInfo.getAddrSpace(), ~0u,
MF.getFunction().getAttributes(), TLI))
return false;

if (DstAlignCanChange) {
Expand Down Expand Up @@ -1107,12 +1101,11 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();

if (!findGISelOptimalMemOpLowering(
MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
SrcAlign,
/*IsMemset=*/false,
/*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
/*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
MemOps, Limit,
MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
IsVolatile),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes(), TLI))
return false;

if (DstAlignCanChange) {
Expand Down Expand Up @@ -1214,12 +1207,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
// to a bug in it's findOptimalMemOpLowering implementation. For now do the
// same thing here.
if (!findGISelOptimalMemOpLowering(
MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
SrcAlign,
/*IsMemset=*/false,
/*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
/*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
MemOps, Limit,
MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
/*IsVolatile*/ true),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes(), TLI))
return false;

if (DstAlignCanChange) {
Expand Down
30 changes: 12 additions & 18 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Expand Up @@ -5908,13 +5908,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);

if (!TLI.findOptimalMemOpLowering(
MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),
(isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
/*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
/*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
MemOps, Limit,
MemOp::Copy(Size, DstAlignCanChange, Alignment,
isZeroConstant ? 0 : SrcAlign, isVol, CopyFromConstant),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes()))
return SDValue();

if (DstAlignCanChange) {
Expand Down Expand Up @@ -6088,14 +6087,11 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (Align > SrcAlign)
SrcAlign = Align;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
// FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
// findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
// correct code.
bool AllowOverlap = false;
if (!TLI.findOptimalMemOpLowering(
MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
/*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MemOps, Limit,
MemOp::Copy(Size, DstAlignCanChange, Align, SrcAlign,
/*IsVolatile*/ true),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes()))
return SDValue();

Expand Down Expand Up @@ -6193,11 +6189,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!TLI.findOptimalMemOpLowering(
MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
(DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
/*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
/*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
MF.getFunction().getAttributes()))
MemOps, TLI.getMaxStoresPerMemset(OptSize),
MemOp::Set(Size, DstAlignCanChange, Align, IsZeroVal, isVol),
DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
return SDValue();

if (DstAlignCanChange) {
Expand Down
28 changes: 10 additions & 18 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Expand Up @@ -176,37 +176,28 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
return LowerCallTo(CLI);
}

bool
TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const {
bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS, const AttributeList &FuncAttributes) const {
// If 'SrcAlign' is zero, that means the memory operation does not need to
// load the value, i.e. memset or memcpy from constant string. Otherwise,
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
// is the specified alignment of the memory operation. If it is zero, that
// means it's possible to change the alignment of the destination.
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
if (!(Op.SrcAlign == 0 || Op.SrcAlign >= Op.DstAlign))
return false;

EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
IsMemset, ZeroMemset, MemcpyStrSrc,
FuncAttributes);
EVT VT = getOptimalMemOpType(Op, FuncAttributes);

if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
!allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
while (Op.DstAlign && Op.DstAlign < VT.getSizeInBits() / 8 &&
!allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());

Expand All @@ -223,6 +214,7 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
}

unsigned NumMemOps = 0;
auto Size = Op.Size;
while (Size != 0) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
Expand Down Expand Up @@ -257,8 +249,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
if (NumMemOps && AllowOverlap && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
if (NumMemOps && Op.AllowOverlap && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign,
MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
Expand Down

0 comments on commit 3c89b75

Please sign in to comment.