From 366e62a0cb5d1c94d3b281f094755c4dd4c76df9 Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Wed, 8 Jan 2025 00:49:29 -0800 Subject: [PATCH 1/8] [X86] Combine `uitofp to ` (#121809) Closes #121793 --- .../SelectionDAG/LegalizeVectorOps.cpp | 25 +++ llvm/test/CodeGen/X86/uint_to_half.ll | 198 ++++++++++++++++++ 2 files changed, 223 insertions(+) create mode 100644 llvm/test/CodeGen/X86/uint_to_half.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index e8404a13009a7..89a00c5a4f043 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1777,6 +1777,31 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, assert((BW == 64 || BW == 32) && "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + // If STRICT_/FMUL is not supported by the target (in case of f16) replace the + // UINT_TO_FP with a larger float and round to the smaller type + if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) || + (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) { + EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64; + SDValue UIToFP; + SDValue Result; + SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true); + EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT); + if (IsStrict) { + UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other}, + {Node->getOperand(0), Src}); + Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other}, + {Node->getOperand(0), UIToFP, TargetZero}); + Results.push_back(Result); + Results.push_back(Result.getValue(1)); + } else { + UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src); + Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero); + Results.push_back(Result); + } + + return; + } + SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT); // Constants to clear the upper part of the word. diff --git a/llvm/test/CodeGen/X86/uint_to_half.ll b/llvm/test/CodeGen/X86/uint_to_half.ll new file mode 100644 index 0000000000000..b62a07eec1ce6 --- /dev/null +++ b/llvm/test/CodeGen/X86/uint_to_half.ll @@ -0,0 +1,198 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx,+f16c | FileCheck %s -check-prefixes=AVX1 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+f16c | FileCheck %s -check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s -check-prefixes=AVX512 + +define <8 x half> @test_uitofp_v8i32_v8f16(<8 x i32> %a) { +; AVX1-LABEL: test_uitofp_v8i32_v8f16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 +; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_uitofp_v8i32_v8f16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] +; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_uitofp_v8i32_v8f16: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %vec = uitofp <8 x i32> %a to <8 x half> + ret <8 x half> %vec +} + +define <8 x half> @test_strict_uitofp_v8i32_v8f16(<8 x i32> %a) { +; AVX1-LABEL: test_strict_uitofp_v8i32_v8f16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 +; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_strict_uitofp_v8i32_v8f16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] +; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_strict_uitofp_v8i32_v8f16: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %vec = tail call <8 x half> @llvm.experimental.constrained.uitofp.f16.i32(<8 x i32> %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x half> %vec +} + +define <16 x half> @test_uitofp_v16i32_v16f16(<16 x i32> %a) { +; AVX1-LABEL: test_uitofp_v16i32_v16f16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4] +; AVX1-NEXT: vmulps %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [65535,65535,65535,65535,65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX1-NEXT: vpsrld $16, %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vpsrld $16, %xmm5, %xmm5 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2 +; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2 +; AVX1-NEXT: vmulps %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 +; AVX1-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_uitofp_v16i32_v16f16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] +; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm4[1],ymm0[2],ymm4[3],ymm0[4],ymm4[5],ymm0[6],ymm4[7],ymm0[8],ymm4[9],ymm0[10],ymm4[11],ymm0[12],ymm4[13],ymm0[14],ymm4[15] +; AVX2-NEXT: vbroadcastss {{.*#+}} ymm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] +; AVX2-NEXT: vsubps %ymm5, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; AVX2-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15] +; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm4[1],ymm1[2],ymm4[3],ymm1[4],ymm4[5],ymm1[6],ymm4[7],ymm1[8],ymm4[9],ymm1[10],ymm4[11],ymm1[12],ymm4[13],ymm1[14],ymm4[15] +; AVX2-NEXT: vsubps %ymm5, %ymm1, %ymm1 +; AVX2-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_uitofp_v16i32_v16f16: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512-NEXT: retq + %vec = uitofp <16 x i32> %a to <16 x half> + ret <16 x half> %vec +} + +define <16 x half> @test_strict_uitofp_v16i32_v16f16(<16 x i32> %a) { +; AVX1-LABEL: test_strict_uitofp_v16i32_v16f16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4] +; AVX1-NEXT: vmulps %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [65535,65535,65535,65535,65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX1-NEXT: vpsrld $16, %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vpsrld $16, %xmm5, %xmm5 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2 +; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2 +; AVX1-NEXT: vmulps %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 +; AVX1-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_strict_uitofp_v16i32_v16f16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] +; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm4[1],ymm0[2],ymm4[3],ymm0[4],ymm4[5],ymm0[6],ymm4[7],ymm0[8],ymm4[9],ymm0[10],ymm4[11],ymm0[12],ymm4[13],ymm0[14],ymm4[15] +; AVX2-NEXT: vbroadcastss {{.*#+}} ymm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] +; AVX2-NEXT: vsubps %ymm5, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; AVX2-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15] +; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm4[1],ymm1[2],ymm4[3],ymm1[4],ymm4[5],ymm1[6],ymm4[7],ymm1[8],ymm4[9],ymm1[10],ymm4[11],ymm1[12],ymm4[13],ymm1[14],ymm4[15] +; AVX2-NEXT: vsubps %ymm5, %ymm1, %ymm1 +; AVX2-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_strict_uitofp_v16i32_v16f16: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512-NEXT: retq + %vec = tail call <16 x half> @llvm.experimental.constrained.uitofp.f16.i32(<16 x i32> %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <16 x half> %vec +} From e5341784dbcc9e166827233a66fb54645204a43e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 8 Jan 2025 09:50:04 +0100 Subject: [PATCH 2/8] [LLVM] Update inlining maintainers (#120579) Update maintainers for inlining, and add section for NewPM/CGSCC. --- llvm/Maintainers.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index 8d86ac45f3d05..2ccf30b8139aa 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -45,8 +45,12 @@ Matthew.Arsenault@amd.com, arsenm2@gmail.com (email), [arsenm](https://github.co #### Inlining -Chandler Carruth \ -chandlerc@gmail.com, chandlerc@google.com (email), [chandlerc](https://github.com/chandlerc) (GitHub) +Arthur Eubanks \ +aeubanks@google.com (email), [aeubanks](https://github.com/aeubanks) (GitHub) \ +Mircea Trofin (esp. ML inliner) \ +mtrofin@google.com (email), [mtrofin](https://github.com/mtrofin) (GitHub) \ +Kazu Hirata (esp. module inliner and inline order) \ +kazu@google.com (email), [kazutakahirata](https://github.com/kazutakahirata) (GitHub) #### InstCombine, InstSimplify, ValueTracking, ConstantFold @@ -65,6 +69,11 @@ mail@justinbogner.com (email), [bogner](https://github.com/bogner) (GitHub) Diego Novillo \ dnovillo@google.com (email), [dnovillo](https://github.com/dnovillo) (GitHub) +#### New pass manager, CGSCC, LazyCallGraph + +Arthur Eubanks \ +aeubanks@google.com (email), [aeubanks](https://github.com/aeubanks) (GitHub) + #### LoopStrengthReduce Quentin Colombet \ @@ -462,7 +471,7 @@ sabre@nondot.org (email), [lattner](https://github.com/lattner) (GitHub), clattn Paul C. Anagnostopoulos (paul@windfall.com, [Paul-C-Anagnostopoulos](https://github.com/Paul-C-Anagnostopoulos)) -- TableGen \ Justin Bogner (mail@justinbogner.com, [bogner](https://github.com/bogner)) -- SelectionDAG \ -Chandler Carruth (chandlerc@gmail.com, chandlerc@google.com, [chandlerc](https://github.com/chandlerc)) -- ADT, Support \ +Chandler Carruth (chandlerc@gmail.com, chandlerc@google.com, [chandlerc](https://github.com/chandlerc)) -- ADT, Support, Inlining \ Peter Collingbourne (peter@pcc.me.uk, [pcc](https://github.com/pcc)) -- LTO \ Evan Cheng (evan.cheng@apple.com) -- Parts of code generator not covered by someone else \ Jake Ehrlich (jakehehrlich@google.com, [jakehehrlich](https://github.com/jakehehrlich)) -- llvm-objcopy and ObjCopy library \ From 70ab81bc749d0ad67362e612dbb6429ed00a47ec Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Wed, 8 Jan 2025 08:54:48 +0000 Subject: [PATCH 3/8] [AArch64] Ensure APAS instruction passes register parameter (#121928) In PR #112341, the `APAS` instruction was added as part of the Armv9.6-A specification, but it didn't take the Xt register parameter. This change fixes this. --- llvm/lib/Target/AArch64/AArch64InstrFormats.td | 2 ++ llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s | 10 +++++++++- .../MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt | 12 ++++++++++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 47c4c6c39565f..b6fabdb7db59d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1804,7 +1804,9 @@ class TMSystemException op1, string asm, list pattern> } class APASI : SimpleSystemI<0, (ins GPR64:$Xt), "apas", "\t$Xt">, Sched<[]> { + bits<5> Xt; let Inst{20-5} = 0b0111001110000000; + let Inst{4-0} = Xt; let DecoderNamespace = "APAS"; } diff --git a/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s b/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s index baf05f10b9a1e..093101b6cd812 100644 --- a/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s +++ b/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s @@ -2,10 +2,18 @@ // RUN: llvm-mc -triple aarch64 -show-encoding %s | FileCheck %s .func: apas x0 + apas x1 + apas x2 + apas x17 + apas x30 mrs x3, GPCBW_EL3 msr GPCBW_EL3, x4 # CHECK: .func: -# CHECK-NEXT: apas x0 // encoding: [0x1f,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x0 // encoding: [0x00,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x1 // encoding: [0x01,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x2 // encoding: [0x02,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x17 // encoding: [0x11,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x30 // encoding: [0x1e,0x70,0x0e,0xd5] # CHECK-NEXT: mrs x3, GPCBW_EL3 // encoding: [0xa3,0x21,0x3e,0xd5] # CHECK-NEXT: msr GPCBW_EL3, x4 // encoding: [0xa4,0x21,0x1e,0xd5] diff --git a/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt b/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt index c5d074bf0394f..d198771c341b9 100644 --- a/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt +++ b/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt @@ -1,10 +1,18 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mc -triple aarch64 -disassemble %s | FileCheck %s -[0x1f,0x70,0x0e,0xd5] +[0x00,0x70,0x0e,0xd5] +[0x01,0x70,0x0e,0xd5] +[0x02,0x70,0x0e,0xd5] +[0x11,0x70,0x0e,0xd5] +[0x1e,0x70,0x0e,0xd5] [0xa3,0x21,0x3e,0xd5] [0xa4,0x21,0x1e,0xd5] -# CHECK: sys #6, c7, c0, #0 +# CHECK: sys #6, c7, c0, #0, x0 +# CHECK-NEXT: sys #6, c7, c0, #0, x1 +# CHECK-NEXT: sys #6, c7, c0, #0, x2 +# CHECK-NEXT: sys #6, c7, c0, #0, x17 +# CHECK-NEXT: sys #6, c7, c0, #0, x30 # CHECK-NEXT: mrs x3, GPCBW_EL3 # CHECK-NEXT: msr GPCBW_EL3, x4 From b037bceef6a40c5c00c1f67cc5a334e2c4e5e041 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 8 Jan 2025 09:56:56 +0100 Subject: [PATCH 4/8] Add LLVM_GSL_POINTER to llvm::function_ref. (#120699) This can enable clang to detect dangling assignment issues, see #120698. --- llvm/include/llvm/ADT/STLFunctionalExtras.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/STLFunctionalExtras.h b/llvm/include/llvm/ADT/STLFunctionalExtras.h index 3b9d40959d714..a4d50dc3648be 100644 --- a/llvm/include/llvm/ADT/STLFunctionalExtras.h +++ b/llvm/include/llvm/ADT/STLFunctionalExtras.h @@ -36,8 +36,8 @@ namespace llvm { /// a function_ref. template class function_ref; -template -class function_ref { +template +class LLVM_GSL_POINTER function_ref { Ret (*callback)(intptr_t callable, Params ...params) = nullptr; intptr_t callable; From 32bc029be6265838833623fdd88cc665d5658dc7 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 8 Jan 2025 08:59:15 +0000 Subject: [PATCH 5/8] [AArch64] Fix signed comparison warning. NFC --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 0640efde79d01..25b6731cb313a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4748,7 +4748,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost( LT.second.getVectorNumElements() / 2) { if (Index == 0) return 0; - if (Index == LT.second.getVectorNumElements() / 2) + if (Index == (int)LT.second.getVectorNumElements() / 2) return 1; } Kind = TTI::SK_PermuteSingleSrc; From 44e8ee73591bad22ae19748be825c4b66d7b3dde Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Wed, 8 Jan 2025 10:56:03 +0100 Subject: [PATCH 6/8] [flang][doc] refine zero initialization extension documentation (#121956) Following-up on [comments ](https://github.com/llvm/llvm-project/issues/62432#issuecomment-2555316522) in the issue that motivated this extension. --- flang/docs/Extensions.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index 2d1c967a6068d..907f01204a387 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -160,7 +160,11 @@ end * `<>` as synonym for `.NE.` and `/=` * `$` and `@` as legal characters in names * Initialization in type declaration statements using `/values/` -* Saved variables without explicit or default initializers are zero initialized. +* Saved variables without explicit or default initializers are zero initialized, + except for scalar variables from the main program that are not explicitly + initialized or marked with an explicit SAVE attribute (these variables may be + placed on the stack by flang and not zero initialized). It is not advised to + rely on this extension in new code. * In a saved entity of a type with a default initializer, components without default values are zero initialized. * Kind specification with `*`, e.g. `REAL*4` From 303249c4490a7777a744d9afd449b64ff1132a42 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Wed, 8 Jan 2025 10:05:21 +0000 Subject: [PATCH 7/8] [flang][StackArrays] track pointers through fir.convert (#121919) This does add a little computational complexity because now every freemem operation has to be tested for every allocation. This could be improved with some more memoisation but I think it is easier to read this way. Let me know if you would prefer me to change this to pre-compute the normalised addresses each freemem operation is using. Weirdly, this change resulted in a verifier failure for the fir.declare in the previous test case. Maybe it was previously removed as dead code and now it isn't. Anyway I fixed that too. --- .../lib/Optimizer/Transforms/StackArrays.cpp | 37 +++++++++++-------- flang/test/Transforms/stack-arrays.fir | 20 +++++++++- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp index bdcb8199b790d..2a9d3397e87b0 100644 --- a/flang/lib/Optimizer/Transforms/StackArrays.cpp +++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp @@ -330,6 +330,18 @@ std::optional LatticePoint::get(mlir::Value val) const { return it->second; } +static mlir::Value lookThroughDeclaresAndConverts(mlir::Value value) { + while (mlir::Operation *op = value.getDefiningOp()) { + if (auto declareOp = llvm::dyn_cast(op)) + value = declareOp.getMemref(); + else if (auto convertOp = llvm::dyn_cast(op)) + value = convertOp->getOperand(0); + else + return value; + } + return value; +} + mlir::LogicalResult AllocationAnalysis::visitOperation( mlir::Operation *op, const LatticePoint &before, LatticePoint *after) { LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op @@ -363,10 +375,10 @@ mlir::LogicalResult AllocationAnalysis::visitOperation( mlir::Value operand = op->getOperand(0); // Note: StackArrays is scheduled in the pass pipeline after lowering hlfir - // to fir. Therefore, we only need to handle `fir::DeclareOp`s. - if (auto declareOp = - llvm::dyn_cast_if_present(operand.getDefiningOp())) - operand = declareOp.getMemref(); + // to fir. Therefore, we only need to handle `fir::DeclareOp`s. Also look + // past converts in case the pointer was changed between different pointer + // types. + operand = lookThroughDeclaresAndConverts(operand); std::optional operandState = before.get(operand); if (operandState && *operandState == AllocationState::Allocated) { @@ -535,17 +547,12 @@ AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem, // remove freemem operations llvm::SmallVector erases; - for (mlir::Operation *user : allocmem.getOperation()->getUsers()) { - if (auto declareOp = mlir::dyn_cast_if_present(user)) { - for (mlir::Operation *user : declareOp->getUsers()) { - if (mlir::isa(user)) - erases.push_back(user); - } - } - - if (mlir::isa(user)) - erases.push_back(user); - } + mlir::Operation *parent = allocmem->getParentOp(); + // TODO: this shouldn't need to be re-calculated for every allocmem + parent->walk([&](fir::FreeMemOp freeOp) { + if (lookThroughDeclaresAndConverts(freeOp->getOperand(0)) == allocmem) + erases.push_back(freeOp); + }); // now we are done iterating the users, it is safe to mutate them for (mlir::Operation *erase : erases) diff --git a/flang/test/Transforms/stack-arrays.fir b/flang/test/Transforms/stack-arrays.fir index 66cd2a5aa910b..444136d53e034 100644 --- a/flang/test/Transforms/stack-arrays.fir +++ b/flang/test/Transforms/stack-arrays.fir @@ -379,7 +379,8 @@ func.func @placement_loop_declare() { %3 = arith.addi %c1, %c2 : index // operand is now available %4 = fir.allocmem !fir.array, %3 - %5 = fir.declare %4 {uniq_name = "temp"} : (!fir.heap>) -> !fir.heap> + %shape = fir.shape %3 : (index) -> !fir.shape<1> + %5 = fir.declare %4(%shape) {uniq_name = "temp"} : (!fir.heap>, !fir.shape<1>) -> !fir.heap> // ... fir.freemem %5 : !fir.heap> fir.result %3, %c1_i32 : index, i32 @@ -400,3 +401,20 @@ func.func @placement_loop_declare() { // CHECK-NEXT: } // CHECK-NEXT: return // CHECK-NEXT: } + +// Can we look through fir.convert and fir.declare? +func.func @lookthrough() { + %0 = fir.allocmem !fir.array<42xi32> + %c42 = arith.constant 42 : index + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %1 = fir.declare %0(%shape) {uniq_name = "name"} : (!fir.heap>, !fir.shape<1>) -> !fir.heap> + %2 = fir.convert %1 : (!fir.heap>) -> !fir.ref> + // use the ref so the converts aren't folded + %3 = fir.load %2 : !fir.ref> + %4 = fir.convert %2 : (!fir.ref>) -> !fir.heap> + fir.freemem %4 : !fir.heap> + return +} +// CHECK: func.func @lookthrough() { +// CHECK: fir.alloca !fir.array<42xi32> +// CHECK-NOT: fir.freemem From 67efbd0bf1b2df8a479e09eb2be7db4c3c892f2c Mon Sep 17 00:00:00 2001 From: Ryan Mansfield Date: Wed, 8 Jan 2025 05:07:23 -0500 Subject: [PATCH 8/8] [LLVM] Fix various cl::desc typos and whitespace issues (NFC) (#121955) --- llvm/lib/Analysis/ScalarEvolution.cpp | 2 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 2 +- llvm/lib/CodeGen/MIRSampleProfile.cpp | 5 +++-- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 2 +- .../lib/CodeGen/MachineBranchProbabilityInfo.cpp | 2 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 2 +- llvm/lib/CodeGen/RegisterCoalescer.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- .../CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 6 +++--- .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 6 +++--- llvm/lib/Passes/PassBuilderPipelines.cpp | 8 ++++---- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- .../Transforms/IPO/FunctionSpecialization.cpp | 14 +++++++------- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 2 +- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 +- llvm/lib/Transforms/IPO/SampleProfile.cpp | 11 ++++++----- .../Instrumentation/HWAddressSanitizer.cpp | 2 +- .../Instrumentation/LowerAllowCheckPass.cpp | 2 +- .../Instrumentation/PGOInstrumentation.cpp | 16 ++++++++-------- .../lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 2 +- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 2 +- .../lib/Transforms/Scalar/LoopVersioningLICM.cpp | 2 +- .../lib/Transforms/Utils/AssumeBundleBuilder.cpp | 4 ++-- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 7 ++++--- 24 files changed, 55 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index b5668a14a4a21..7e18f7c9c1ace 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -226,7 +226,7 @@ static cl::opt RangeIterThreshold( static cl::opt MaxLoopGuardCollectionDepth( "scalar-evolution-max-loop-guard-collection-depth", cl::Hidden, - cl::desc("Maximum depth for recrusive loop guard collection"), cl::init(1)); + cl::desc("Maximum depth for recursive loop guard collection"), cl::init(1)); static cl::opt ClassifyExpressions("scalar-evolution-classify-expressions", diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 5c712e4f007d3..ba1b10ec8b9b1 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -152,7 +152,7 @@ static cl::opt static cl::opt EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), - cl::desc("Enable sinkinig and/cmp into branches.")); + cl::desc("Enable sinking and/cmp into branches.")); static cl::opt DisableStoreExtract( "disable-cgp-store-extract", cl::Hidden, cl::init(false), diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp index 23db09b89599a..9bba50e8e6924 100644 --- a/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -46,8 +46,9 @@ static cl::opt ShowFSBranchProb( cl::desc("Print setting flow sensitive branch probabilities")); static cl::opt FSProfileDebugProbDiffThreshold( "fs-profile-debug-prob-diff-threshold", cl::init(10), - cl::desc("Only show debug message if the branch probility is greater than " - "this value (in percentage).")); + cl::desc( + "Only show debug message if the branch probability is greater than " + "this value (in percentage).")); static cl::opt FSProfileDebugBWThreshold( "fs-profile-debug-bw-threshold", cl::init(10000), diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 0f68313e64f54..05bc4cf646f42 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -149,7 +149,7 @@ static cl::opt JumpInstCost("jump-inst-cost", static cl::opt TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " - "Creates more fallthrough opportunites in " + "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden); diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 56ffffff62240..2e92dd8f257b4 100644 --- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -29,7 +29,7 @@ namespace llvm { cl::opt StaticLikelyProb("static-likely-prob", cl::desc("branch probability threshold in percentage" - "to be considered very likely"), + " to be considered very likely"), cl::init(80), cl::Hidden); cl::opt ProfileLikelyProb( diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index e61dad5cf64d0..b94992c20b119 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -140,7 +140,7 @@ static cl::opt GreedyReverseLocalAssignment( static cl::opt SplitThresholdForRegWithHint( "split-threshold-for-reg-with-hint", cl::desc("The threshold for splitting a virtual register with a hint, in " - "percentate"), + "percentage"), cl::init(75), cl::Hidden); static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 7dac0deed7b7e..8313927dd2aa1 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -113,7 +113,7 @@ static cl::opt LargeIntervalSizeThreshold( static cl::opt LargeIntervalFreqThreshold( "large-interval-freq-threshold", cl::Hidden, - cl::desc("For a large interval, if it is coalesed with other live " + cl::desc("For a large interval, if it is coalesced with other live " "intervals many times more than the threshold, stop its " "coalescing to control the compile time. "), cl::init(256)); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6ea9e68a3a005..da3c834417d6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -141,7 +141,7 @@ static cl::opt EnableReduceLoadOpStoreWidth( static cl::opt ReduceLoadOpStoreWidthForceNarrowingProfitable( "combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), - cl::desc("DAG combiner force override the narrowing profitable check when" + cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences")); static cl::opt EnableShrinkLoadReplaceStoreWithStore( diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 9e5867c70d7b6..51ee3cc681f05 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -125,9 +125,9 @@ static cl::opt MaxReorderWindow( cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp")); -static cl::opt AvgIPC( - "sched-avg-ipc", cl::Hidden, cl::init(1), - cl::desc("Average inst/cycle whan no target itinerary exists.")); +static cl::opt + AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle when no target itinerary exists.")); namespace { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 26fc75c0578ec..dff7243b0a99c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -43,9 +43,9 @@ STATISTIC(LoadsClustered, "Number of loads clustered together"); // without a target itinerary. The choice of number here has more to do with // balancing scheduler heuristics than with the actual machine latency. static cl::opt HighLatencyCycles( - "sched-high-latency-cycles", cl::Hidden, cl::init(10), - cl::desc("Roughly estimate the number of cycles that 'long latency'" - "instructions take for targets with no itinerary")); + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency' " + "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {} diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index d737ea5ab070a..4ec0fb8fc81ea 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -189,9 +189,9 @@ static cl::opt EnableGlobalAnalyses( "enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses")); -static cl::opt - RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, - cl::desc("Run Partial inlinining pass")); +static cl::opt RunPartialInlining("enable-partial-inlining", + cl::init(false), cl::Hidden, + cl::desc("Run Partial inlining pass")); static cl::opt ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, @@ -264,7 +264,7 @@ static cl::opt static cl::opt FlattenedProfileUsed( "flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " - "no inline hierachy exists in the profile")); + "no inline hierarchy exists in the profile")); static cl::opt EnableOrderFileInstrumentation( "enable-order-file-instrumentation", cl::init(false), cl::Hidden, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a6bfb489faff2..6b0eb38e7e095 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -94,7 +94,7 @@ static cl::opt BrMergingCcmpBias( static cl::opt WidenShift("x86-widen-shift", cl::init(true), - cl::desc("Replacte narrow shifts with wider shifts."), + cl::desc("Replace narrow shifts with wider shifts."), cl::Hidden); static cl::opt BrMergingLikelyBias( diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 96956481df2f6..449d64d1614ff 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -66,19 +66,19 @@ static cl::opt MaxCodeSizeGrowth( "Maximum codesize growth allowed per function")); static cl::opt MinCodeSizeSavings( - "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc( - "Reject specializations whose codesize savings are less than this" - "much percent of the original function size")); + "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, + cl::desc("Reject specializations whose codesize savings are less than this " + "much percent of the original function size")); static cl::opt MinLatencySavings( "funcspec-min-latency-savings", cl::init(40), cl::Hidden, - cl::desc("Reject specializations whose latency savings are less than this" + cl::desc("Reject specializations whose latency savings are less than this " "much percent of the original function size")); static cl::opt MinInliningBonus( - "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc( - "Reject specializations whose inlining bonus is less than this" - "much percent of the original function size")); + "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, + cl::desc("Reject specializations whose inlining bonus is less than this " + "much percent of the original function size")); static cl::opt SpecializeOnAddress( "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc( diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 16a80e9ebbeaa..78cd249c9c16a 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -105,7 +105,7 @@ static cl::opt ColdCCRelFreq( "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::desc( "Maximum block frequency, expressed as a percentage of caller's " - "entry frequency, for a call site to be considered cold for enabling" + "entry frequency, for a call site to be considered cold for enabling " "coldcc")); /// Is this global variable possibly used by a leak checker as a root? If so, diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index b40ab357670b8..67585e9c80ef4 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -129,7 +129,7 @@ static cl::opt PrintModuleBeforeOptimizations( static cl::opt AlwaysInlineDeviceFunctions( "openmp-opt-inline-device", - cl::desc("Inline all applicible functions on the device."), cl::Hidden, + cl::desc("Inline all applicable functions on the device."), cl::Hidden, cl::init(false)); static cl::opt diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 603beb3b883d7..b978c54ef96fd 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -162,7 +162,7 @@ static cl::opt ProfileSampleBlockAccurate( static cl::opt ProfileAccurateForSymsInList( "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " - "be accurate. It may be overriden by profile-sample-accurate. ")); + "be accurate. It may be overridden by profile-sample-accurate. ")); static cl::opt ProfileMergeInlinee( "sample-profile-merge-inlinee", cl::Hidden, cl::init(true), @@ -193,9 +193,10 @@ static cl::opt ProfileSizeInline( // and inline the hot functions (that are skipped in this pass). static cl::opt DisableSampleLoaderInlining( "disable-sample-loader-inlining", cl::Hidden, cl::init(false), - cl::desc("If true, artifically skip inline transformation in sample-loader " - "pass, and merge (or scale) profiles (as configured by " - "--sample-profile-merge-inlinee).")); + cl::desc( + "If true, artificially skip inline transformation in sample-loader " + "pass, and merge (or scale) profiles (as configured by " + "--sample-profile-merge-inlinee).")); namespace llvm { cl::opt @@ -255,7 +256,7 @@ static cl::opt PrecentMismatchForStalenessError( static cl::opt CallsitePrioritizedInline( "sample-profile-prioritized-inline", cl::Hidden, - cl::desc("Use call site prioritized inlining for sample profile loader." + cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported.")); static cl::opt UsePreInlinerDecision( diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 530061e3b6bb7..2031728c2f33d 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -192,7 +192,7 @@ static cl::opt cl::Hidden); static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", - cl::desc("Hot percentile cuttoff.")); + cl::desc("Hot percentile cutoff.")); static cl::opt ClRandomSkipRate("hwasan-random-rate", diff --git a/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp index 2418030dd601a..f27798cfd228c 100644 --- a/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp +++ b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp @@ -30,7 +30,7 @@ using namespace llvm; static cl::opt HotPercentileCutoff("lower-allow-check-percentile-cutoff-hot", - cl::desc("Hot percentile cuttoff.")); + cl::desc("Hot percentile cutoff.")); static cl::opt RandomRate("lower-allow-check-random-rate", diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 471086ce3a751..db4d62ec36751 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -158,11 +158,11 @@ STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed"); // Command line option to specify the file to read profile from. This is // mainly used for testing. -static cl::opt - PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, - cl::value_desc("filename"), - cl::desc("Specify the path of profile data file. This is" - "mainly for test purpose.")); +static cl::opt PGOTestProfileFile( + "pgo-test-profile-file", cl::init(""), cl::Hidden, + cl::value_desc("filename"), + cl::desc("Specify the path of profile data file. This is " + "mainly for test purpose.")); static cl::opt PGOTestProfileRemappingFile( "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), @@ -186,7 +186,7 @@ static cl::opt MaxNumAnnotations( // to write to the metadata for a single memop intrinsic. static cl::opt MaxNumMemOPAnnotations( "memop-max-annotations", cl::init(4), cl::Hidden, - cl::desc("Max number of preicise value annotations for a single memop" + cl::desc("Max number of precise value annotations for a single memop" "intrinsic")); // Command line option to control appending FunctionHash to the name of a COMDAT @@ -291,13 +291,13 @@ static cl::opt PGOVerifyHotBFI( cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " - "internal option -pass-remakrs-analysis=pgo.")); + "internal option -pass-remarks-analysis=pgo.")); static cl::opt PGOVerifyBFI( "pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " - "internal option -pass-remakrs-analysis=pgo.")); + "internal option -pass-remarks-analysis=pgo.")); static cl::opt PGOVerifyBFIRatio( "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index ba1c2241aea9a..3c82eeda54838 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -128,7 +128,7 @@ static cl::opt static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", - cl::desc("Use loop idiom recognition code size heuristics when compiling" + cl::desc("Use loop idiom recognition code size heuristics when compiling " "with -Os/-Oz"), cl::init(true), cl::Hidden); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 260cc72c3188d..090348809e571 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -104,7 +104,7 @@ static cl::opt UnrollMaxPercentThresholdBoost( static cl::opt UnrollMaxIterationsCountToAnalyze( "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, - cl::desc("Don't allow loop unrolling to simulate more than this number of" + cl::desc("Don't allow loop unrolling to simulate more than this number of " "iterations when checking full unroll profitability")); static cl::opt UnrollCount( diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index f58dcb51f64fe..6e91c4fa6e230 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -95,7 +95,7 @@ static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable"; /// invariant instructions in a loop. static cl::opt LVInvarThreshold("licm-versioning-invariant-threshold", - cl::desc("LoopVersioningLICM's minimum allowed percentage" + cl::desc("LoopVersioningLICM's minimum allowed percentage " "of possible invariant instructions per loop"), cl::init(25), cl::Hidden); diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp index 1d4f5618b39d0..b499ef839729e 100644 --- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -28,8 +28,8 @@ using namespace llvm; namespace llvm { cl::opt ShouldPreserveAllAttributes( "assume-preserve-all", cl::init(false), cl::Hidden, - cl::desc("enable preservation of all attrbitues. even those that are " - "unlikely to be usefull")); + cl::desc("enable preservation of all attributes. even those that are " + "unlikely to be useful")); cl::opt EnableKnowledgeRetention( "enable-knowledge-retention", cl::init(false), cl::Hidden, diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 03dc6c1d17446..e367b01a09090 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -96,8 +96,9 @@ using namespace PatternMatch; cl::opt llvm::RequireAndPreserveDomTree( "simplifycfg-require-and-preserve-domtree", cl::Hidden, - cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " - "into preserving DomTree,")); + cl::desc( + "Temporary development switch used to gradually uplift SimplifyCFG " + "into preserving DomTree,")); // Chosen as 2 so as to be cheap, but still to have enough power to fold // a select, so the "clamp" idiom (of a min followed by a max) will be caught. @@ -126,7 +127,7 @@ static cl::opt HoistLoadsStoresWithCondFaulting( static cl::opt HoistLoadsStoresWithCondFaultingThreshold( "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), - cl::desc("Control the maximal conditonal load/store that we are willing " + cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"));