Skip to content

Commit

Permalink
[X86] Lower vXi8 multiplies using PMADDUBSW on SSSE3+ targets (llvm#9…
Browse files Browse the repository at this point in the history
…5690)

Extends llvm#95403 to handle non-constant cases - we can avoid unpacks/extensions from vXi8 to vXi16 by using PMADDUBSW instead and truncating the vXi16 results back together.

Most targets benefit from performing this for non-constant cases - its just Intel Core/SandyBridge era CPUs that might experience additional Port0/15 contention (but lower instruction count).

Fixes llvm#90748
  • Loading branch information
RKSimon committed Jun 25, 2024
1 parent 9acb533 commit a46a2c2
Show file tree
Hide file tree
Showing 18 changed files with 953 additions and 947 deletions.
22 changes: 12 additions & 10 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28503,17 +28503,19 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,

MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);

// For vXi8 mul-by-constant, try PMADDUBSW to avoid the need for extension.
// For vXi8 mul, try PMADDUBSW to avoid the need for extension.
// Don't do this if we only need to unpack one half.
if (Subtarget.hasSSSE3() &&
ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
bool IsLoLaneAllZeroOrUndef = true;
bool IsHiLaneAllZeroOrUndef = true;
for (auto [Idx, Val] : enumerate(B->ops())) {
if ((Idx % NumEltsPerLane) >= (NumEltsPerLane / 2))
IsHiLaneAllZeroOrUndef &= isNullConstantOrUndef(Val);
else
IsLoLaneAllZeroOrUndef &= isNullConstantOrUndef(Val);
if (Subtarget.hasSSSE3()) {
bool BIsBuildVector = isa<BuildVectorSDNode>(B);
bool IsLoLaneAllZeroOrUndef = BIsBuildVector;
bool IsHiLaneAllZeroOrUndef = BIsBuildVector;
if (BIsBuildVector) {
for (auto [Idx, Val] : enumerate(B->ops())) {
if ((Idx % NumEltsPerLane) >= (NumEltsPerLane / 2))
IsHiLaneAllZeroOrUndef &= isNullConstantOrUndef(Val);
else
IsLoLaneAllZeroOrUndef &= isNullConstantOrUndef(Val);
}
}
if (!(IsLoLaneAllZeroOrUndef || IsHiLaneAllZeroOrUndef)) {
SDValue Mask = DAG.getBitcast(VT, DAG.getConstant(0x00FF, dl, ExVT));
Expand Down
20 changes: 14 additions & 6 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,8 +852,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } }, // psubw

{ ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } }, // extend/pmullw/trunc
{ ISD::MUL, MVT::v32i8, { 6, 11,10,11 } }, // extend/pmullw/trunc
{ ISD::MUL, MVT::v64i8, { 6, 12,10,11 } }, // unpack/pmullw
{ ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } }, // pmaddubsw
{ ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } }, // pmaddubsw
{ ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } }, // pmullw

{ ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } }, // psubb
Expand Down Expand Up @@ -1119,7 +1119,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } }, // paddq

{ ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } }, // extend/pmullw/pack
{ ISD::MUL, MVT::v32i8, { 6, 11,10,20 } }, // unpack/pmullw
{ ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } }, // pmaddubsw
{ ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } }, // pmullw
{ ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } }, // pmulld
{ ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } }, // pmulld
Expand Down Expand Up @@ -1170,8 +1170,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
{ ISD::MUL, MVT::v32i8, { 12, 12, 22, 23 } }, // unpack/pmullw + split
{ ISD::MUL, MVT::v16i8, { 5, 6, 10, 12 } }, // unpack/pmullw
{ ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } }, // pmaddubsw + split
{ ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } }, // 2*pmaddubsw/3*and/psllw/or
{ ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } }, // pmullw + split
{ ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } }, // pmulld + split
{ ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } }, // pmulld
Expand Down Expand Up @@ -1311,7 +1311,6 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v4i32, { 16, 17,15,19 } }, // Shift each lane + blend.
{ ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } }, // splat+shuffle sequence.

{ ISD::MUL, MVT::v16i8, { 6, 18,10,12 } }, // 2*unpack/2*pmullw/2*and/pack
{ ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } } // pmulld (Nehalem from agner.org)
};

Expand All @@ -1320,6 +1319,15 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
if (auto KindCost = Entry->Cost[CostKind])
return LT.first * *KindCost;

static const CostKindTblEntry SSSE3CostTable[] = {
{ ISD::MUL, MVT::v16i8, { 5, 18,10,12 } }, // 2*pmaddubsw/3*and/psllw/or
};

if (ST->hasSSSE3())
if (const auto *Entry = CostTableLookup(SSSE3CostTable, ISD, LT.second))
if (auto KindCost = Entry->Cost[CostKind])
return LT.first * *KindCost;

static const CostKindTblEntry SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Analysis/CostModel/X86/arith-int-codesize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -791,9 +791,9 @@ define i32 @mul(i32 %arg) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = mul <32 x i8> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = mul <64 x i8> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = mul <16 x i8> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = mul <64 x i8> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE42-LABEL: 'mul'
Expand Down Expand Up @@ -835,9 +835,9 @@ define i32 @mul(i32 %arg) {
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'mul'
Expand All @@ -858,8 +858,8 @@ define i32 @mul(i32 %arg) {
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'mul'
Expand All @@ -880,7 +880,7 @@ define i32 @mul(i32 %arg) {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
Expand All @@ -902,8 +902,8 @@ define i32 @mul(i32 %arg) {
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'mul'
Expand All @@ -924,7 +924,7 @@ define i32 @mul(i32 %arg) {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Analysis/CostModel/X86/arith-int-latency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -680,8 +680,8 @@ define i32 @mul(i32 %arg) {
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'mul'
Expand All @@ -702,8 +702,8 @@ define i32 @mul(i32 %arg) {
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'mul'
Expand All @@ -724,7 +724,7 @@ define i32 @mul(i32 %arg) {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
Expand All @@ -746,8 +746,8 @@ define i32 @mul(i32 %arg) {
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'mul'
Expand All @@ -768,7 +768,7 @@ define i32 @mul(i32 %arg) {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -680,8 +680,8 @@ define i32 @mul(i32 %arg) {
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'mul'
Expand All @@ -702,8 +702,8 @@ define i32 @mul(i32 %arg) {
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'mul'
Expand All @@ -724,7 +724,7 @@ define i32 @mul(i32 %arg) {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
Expand All @@ -746,8 +746,8 @@ define i32 @mul(i32 %arg) {
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'mul'
Expand All @@ -768,7 +768,7 @@ define i32 @mul(i32 %arg) {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
Expand Down
Loading

0 comments on commit a46a2c2

Please sign in to comment.