Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
4 changed files
with
501 additions
and
18 deletions.
There are no files selected for viewing
239 changes: 239 additions & 0 deletions
239
packages/l/llvm/files/amdgpu-skip-debug-instrs-in-vgpr.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
From 3dc413e25d6ccff9a9b7a3beffafef52be83e43c Mon Sep 17 00:00:00 2001 | ||
From: Yashwant Singh <Yashwant.Singh@amd.com> | ||
Date: Mon, 7 Aug 2023 11:35:25 +0530 | ||
Subject: [PATCH] [AMDGPU] Skip debug instruction uses while optimizing live | ||
range of a reg in SIOptimizeVGPRLiveRange | ||
|
||
This will prevent the `assert(!O.readsReg())` from firing in | ||
SIOptimizeVGPRLiveRange::optimizeLiveRange | ||
|
||
Fix for #64163 | ||
|
||
Reviewed By: arsenm, #amdgpu | ||
|
||
Differential Revision: https://reviews.llvm.org/D156893 | ||
--- | ||
.../Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp | 6 +- | ||
.../si-optimize-vgpr-live-range-dbg-instr.ll | 76 ++++++++++++ | ||
.../si-optimize-vgpr-live-range-dbg-instr.mir | 111 ++++++++++++++++++ | ||
3 files changed, 191 insertions(+), 2 deletions(-) | ||
create mode 100644 llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll | ||
create mode 100644 llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir | ||
|
||
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp | ||
index e95abae88d7a83d..8204a70e72d916e 100644 | ||
--- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp | ||
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp | ||
@@ -522,9 +522,11 @@ void SIOptimizeVGPRLiveRange::optimizeLiveRange( | ||
auto *UseBlock = UseMI->getParent(); | ||
// Replace uses in Endif block | ||
if (UseBlock == Endif) { | ||
- if (UseMI->isPHI()) { | ||
+ if (UseMI->isPHI()) | ||
O.setReg(NewReg); | ||
- } else { | ||
+ else if (UseMI->isDebugInstr()) | ||
+ continue; | ||
+ else { | ||
// DetectDeadLanes may mark register uses as undef without removing | ||
// them, in which case a non-phi instruction using the original register | ||
// may exist in the Endif block even though the register is not live | ||
diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll | ||
new file mode 100644 | ||
index 000000000000000..d34769ad0fcf0a6 | ||
--- /dev/null | ||
+++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll | ||
@@ -0,0 +1,76 @@ | ||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 | ||
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GCN %s | ||
+ | ||
+declare void @llvm.dbg.value(metadata, metadata, metadata) #0 | ||
+ | ||
+define void @__omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined(i1 %arg) { | ||
+; GCN-LABEL: __omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined: | ||
+; GCN: .Lfunc_begin0: | ||
+; GCN-NEXT: .cfi_sections .debug_frame | ||
+; GCN-NEXT: .cfi_startproc | ||
+; GCN-NEXT: ; %bb.0: ; %bb | ||
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
+; GCN-NEXT: v_mov_b32_e32 v1, 0 | ||
+; GCN-NEXT: v_mov_b32_e32 v2, 0 | ||
+; GCN-NEXT: global_load_dwordx2 v[1:2], v[1:2], off | ||
+; GCN-NEXT: v_and_b32_e32 v0, 1, v0 | ||
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 | ||
+; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 | ||
+; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] | ||
+; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7] | ||
+; GCN-NEXT: s_cbranch_execnz .LBB0_3 | ||
+; GCN-NEXT: ; %bb.1: ; %Flow | ||
+; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] | ||
+; GCN-NEXT: s_cbranch_execnz .LBB0_4 | ||
+; GCN-NEXT: .LBB0_2: ; %bb3 | ||
+; GCN-NEXT: s_or_b64 exec, exec, s[4:5] | ||
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
+; GCN-NEXT: s_setpc_b64 s[30:31] | ||
+; GCN-NEXT: .LBB0_3: ; %bb2 | ||
+; GCN-NEXT: v_mov_b32_e32 v3, 0 | ||
+; GCN-NEXT: v_mov_b32_e32 v4, v3 | ||
+; GCN-NEXT: s_waitcnt vmcnt(0) | ||
+; GCN-NEXT: flat_store_dwordx2 v[1:2], v[3:4] | ||
+; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 | ||
+; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] | ||
+; GCN-NEXT: s_cbranch_execz .LBB0_2 | ||
+; GCN-NEXT: .LBB0_4: ; %bb1 | ||
+; GCN-NEXT: v_mov_b32_e32 v3, 0 | ||
+; GCN-NEXT: v_mov_b32_e32 v4, v3 | ||
+; GCN-NEXT: s_waitcnt vmcnt(0) | ||
+; GCN-NEXT: flat_store_dwordx2 v[1:2], v[3:4] | ||
+; GCN-NEXT: s_or_b64 exec, exec, s[4:5] | ||
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
+; GCN-NEXT: s_setpc_b64 s[30:31] | ||
+bb: | ||
+ %i = load ptr, ptr addrspace(1) null, align 8 | ||
+ br i1 %arg, label %bb1, label %bb2 | ||
+ | ||
+bb1: ; preds = %bb | ||
+ store double 0.000000e+00, ptr %i, align 8 | ||
+ br label %bb3 | ||
+ | ||
+bb2: ; preds = %bb | ||
+ store double 0.000000e+00, ptr %i, align 8 | ||
+ br label %bb3 | ||
+ | ||
+bb3: ; preds = %bb2, %bb1 | ||
+ call void @llvm.dbg.value(metadata !DIArgList(ptr %i, i64 0), metadata !4, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 2712, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst, 2680, DW_OP_stack_value)), !dbg !9 | ||
+ ret void | ||
+} | ||
+ | ||
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } | ||
+ | ||
+!llvm.dbg.cu = !{!0} | ||
+!llvm.module.flags = !{!3} | ||
+ | ||
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) | ||
+!1 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b") | ||
+!2 = !{} | ||
+!3 = !{i32 1, !"Debug Info Version", i32 3} | ||
+!4 = !DILocalVariable(name: "c", scope: !5, file: !1, line: 2, type: !8) | ||
+!5 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) | ||
+!6 = !DISubroutineType(types: !7) | ||
+!7 = !{!8} | ||
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) | ||
+!9 = !DILocation(line: 0, scope: !5) | ||
diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir | ||
new file mode 100644 | ||
index 000000000000000..3bdcc14936fb9bf | ||
--- /dev/null | ||
+++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir | ||
@@ -0,0 +1,111 @@ | ||
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-opt-vgpr-liverange %s -o - | FileCheck -check-prefix=GCN %s | ||
+ | ||
+# SIOptimizeVGPRLiveRange shouldn't try to modify use of %5 in DBG_VALUE_LIST | ||
+ | ||
+--- | | ||
+ define void @dbg_instr_use(i1 %arg) #1 { | ||
+ ret void | ||
+ } | ||
+ | ||
+ attributes #1 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx908" "uniform-work-group-size"="false" } | ||
+ !llvm.dbg.cu = !{!0} | ||
+ !llvm.module.flags = !{!3} | ||
+ | ||
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) | ||
+ !1 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b") | ||
+ !2 = !{} | ||
+ !3 = !{i32 1, !"Debug Info Version", i32 3} | ||
+ !4 = !DILocalVariable(name: "c", scope: !5, file: !1, line: 2, type: !8) | ||
+ !5 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) | ||
+ !6 = !DISubroutineType(types: !7) | ||
+ !7 = !{!8} | ||
+ !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) | ||
+ !9 = !DILocation(line: 0, scope: !5) | ||
+... | ||
+ | ||
+--- | ||
+name: dbg_instr_use | ||
+tracksRegLiveness: true | ||
+body: | | ||
+ ; GCN-LABEL: name: dbg_instr_use | ||
+ ; GCN: bb.0: | ||
+ ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) | ||
+ ; GCN-NEXT: liveins: $vgpr0 | ||
+ ; GCN-NEXT: {{ $}} | ||
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 | ||
+ ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, killed [[COPY]], implicit $exec | ||
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, killed [[V_AND_B32_e32_]], implicit $exec | ||
+ ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 killed [[V_CMP_EQ_U32_e64_]], -1, implicit-def dead $scc | ||
+ ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec | ||
+ ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[V_MOV_B]], 0, 0, implicit $exec | ||
+ ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[S_XOR_B64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | ||
+ ; GCN-NEXT: S_BRANCH %bb.3 | ||
+ ; GCN-NEXT: {{ $}} | ||
+ ; GCN-NEXT: bb.1: | ||
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) | ||
+ ; GCN-NEXT: {{ $}} | ||
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[GLOBAL_LOAD_DWORDX2_]], %bb.0, undef %13:vreg_64, %bb.3 | ||
+ ; GCN-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | ||
+ ; GCN-NEXT: S_BRANCH %bb.2 | ||
+ ; GCN-NEXT: {{ $}} | ||
+ ; GCN-NEXT: bb.2: | ||
+ ; GCN-NEXT: successors: %bb.4(0x80000000) | ||
+ ; GCN-NEXT: {{ $}} | ||
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec | ||
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 | ||
+ ; GCN-NEXT: FLAT_STORE_DWORDX2 killed [[PHI]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr | ||
+ ; GCN-NEXT: S_BRANCH %bb.4 | ||
+ ; GCN-NEXT: {{ $}} | ||
+ ; GCN-NEXT: bb.3: | ||
+ ; GCN-NEXT: successors: %bb.1(0x80000000) | ||
+ ; GCN-NEXT: {{ $}} | ||
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec | ||
+ ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_MOV_B32_e32_1]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 | ||
+ ; GCN-NEXT: FLAT_STORE_DWORDX2 killed [[GLOBAL_LOAD_DWORDX2_]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr | ||
+ ; GCN-NEXT: S_BRANCH %bb.1 | ||
+ ; GCN-NEXT: {{ $}} | ||
+ ; GCN-NEXT: bb.4: | ||
+ ; GCN-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec | ||
+ ; GCN-NEXT: DBG_VALUE_LIST | ||
+ ; GCN-NEXT-SAME: %9 | ||
+ ; GCN-NEXT: SI_RETURN | ||
+ bb.0: | ||
+ successors: %bb.3(0x40000000), %bb.1(0x40000000) | ||
+ liveins: $vgpr0 | ||
+ | ||
+ %0:vgpr_32 = COPY $vgpr0 | ||
+ %1:vgpr_32 = V_AND_B32_e32 1, %0, implicit $exec | ||
+ %2:sreg_64 = V_CMP_EQ_U32_e64 1, killed %1, implicit $exec | ||
+ %3:sreg_64 = S_XOR_B64 killed %2, -1, implicit-def dead $scc | ||
+ %4:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec | ||
+ %5:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %4, 0, 0, implicit $exec | ||
+ %6:sreg_64 = SI_IF killed %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | ||
+ S_BRANCH %bb.3 | ||
+ | ||
+ bb.1: | ||
+ successors: %bb.2(0x40000000), %bb.4(0x40000000) | ||
+ | ||
+ %7:sreg_64 = SI_ELSE %6, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | ||
+ S_BRANCH %bb.2 | ||
+ | ||
+ bb.2: | ||
+ successors: %bb.4(0x80000000) | ||
+ | ||
+ %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec | ||
+ %9:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %8, %subreg.sub1 | ||
+ FLAT_STORE_DWORDX2 %5, killed %9, 0, 0, implicit $exec, implicit $flat_scr | ||
+ S_BRANCH %bb.4 | ||
+ | ||
+ bb.3: | ||
+ successors: %bb.1(0x80000000) | ||
+ | ||
+ %10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec | ||
+ %11:vreg_64 = REG_SEQUENCE %10, %subreg.sub0, %10, %subreg.sub1 | ||
+ FLAT_STORE_DWORDX2 %5, killed %11, 0, 0, implicit $exec, implicit $flat_scr | ||
+ S_BRANCH %bb.1 | ||
+ | ||
+ bb.4: | ||
+ SI_END_CF %7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | ||
+ DBG_VALUE_LIST !4, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 2712, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst, 2680, DW_OP_stack_value), %5, 0, debug-location !9 | ||
+ SI_RETURN | ||
+... |
Oops, something went wrong.