From 8b05b475470073808a2db2bc79b9695ccb0e70a6 Mon Sep 17 00:00:00 2001
From: Gavin Zhao <git@gzgz.dev>
Date: Sun, 5 Nov 2023 17:15:17 -0500
Subject: [PATCH] llvm: Backport more AMDGPU patches

Signed-off-by: Gavin Zhao <git@gzgz.dev>
---
 .../amdgpu-skip-debug-instrs-in-vgpr.patch    | 239 +++++++++++++++++
 .../files/fix-amdgpu-vgpr-live-range.patch    | 242 ++++++++++++++++++
 packages/l/llvm/package.yml                   |   4 +-
 packages/l/llvm/pspec_x86_64.xml              |  34 +--
 4 files changed, 501 insertions(+), 18 deletions(-)
 create mode 100644 packages/l/llvm/files/amdgpu-skip-debug-instrs-in-vgpr.patch
 create mode 100644 packages/l/llvm/files/fix-amdgpu-vgpr-live-range.patch

diff --git a/packages/l/llvm/files/amdgpu-skip-debug-instrs-in-vgpr.patch b/packages/l/llvm/files/amdgpu-skip-debug-instrs-in-vgpr.patch
new file mode 100644
index 00000000000..3dfd900e7b7
--- /dev/null
+++ b/packages/l/llvm/files/amdgpu-skip-debug-instrs-in-vgpr.patch
@@ -0,0 +1,239 @@
+From 3dc413e25d6ccff9a9b7a3beffafef52be83e43c Mon Sep 17 00:00:00 2001
+From: Yashwant Singh <Yashwant.Singh@amd.com>
+Date: Mon, 7 Aug 2023 11:35:25 +0530
+Subject: [PATCH] [AMDGPU] Skip debug instruction uses while optimizing live
+ range of a reg in SIOptimizeVGPRLiveRange
+
+This will prevent the `assert(!O.readsReg())` from firing in
+SIOptimizeVGPRLiveRange::optimizeLiveRange
+
+Fix for #64163
+
+Reviewed By: arsenm, #amdgpu
+
+Differential Revision: https://reviews.llvm.org/D156893
+---
+ .../Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp |   6 +-
+ .../si-optimize-vgpr-live-range-dbg-instr.ll  |  76 ++++++++++++
+ .../si-optimize-vgpr-live-range-dbg-instr.mir | 111 ++++++++++++++++++
+ 3 files changed, 191 insertions(+), 2 deletions(-)
+ create mode 100644 llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll
+ create mode 100644 llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir
+
+diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+index e95abae88d7a83d..8204a70e72d916e 100644
+--- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
++++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+@@ -522,9 +522,11 @@ void SIOptimizeVGPRLiveRange::optimizeLiveRange(
+     auto *UseBlock = UseMI->getParent();
+     // Replace uses in Endif block
+     if (UseBlock == Endif) {
+-      if (UseMI->isPHI()) {
++      if (UseMI->isPHI())
+         O.setReg(NewReg);
+-      } else {
++      else if (UseMI->isDebugInstr())
++        continue;
++      else {
+         // DetectDeadLanes may mark register uses as undef without removing
+         // them, in which case a non-phi instruction using the original register
+         // may exist in the Endif block even though the register is not live
+diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll
+new file mode 100644
+index 000000000000000..d34769ad0fcf0a6
+--- /dev/null
++++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll
+@@ -0,0 +1,76 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
++; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GCN %s
++
++declare void @llvm.dbg.value(metadata, metadata, metadata) #0
++
++define void @__omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined(i1 %arg) {
++; GCN-LABEL: __omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined:
++; GCN:       .Lfunc_begin0:
++; GCN-NEXT:    .cfi_sections .debug_frame
++; GCN-NEXT:    .cfi_startproc
++; GCN-NEXT:  ; %bb.0: ; %bb
++; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
++; GCN-NEXT:    v_mov_b32_e32 v1, 0
++; GCN-NEXT:    v_mov_b32_e32 v2, 0
++; GCN-NEXT:    global_load_dwordx2 v[1:2], v[1:2], off
++; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
++; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
++; GCN-NEXT:    s_xor_b64 s[4:5], vcc, -1
++; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
++; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
++; GCN-NEXT:    s_cbranch_execnz .LBB0_3
++; GCN-NEXT:  ; %bb.1: ; %Flow
++; GCN-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
++; GCN-NEXT:    s_cbranch_execnz .LBB0_4
++; GCN-NEXT:  .LBB0_2: ; %bb3
++; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
++; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
++; GCN-NEXT:    s_setpc_b64 s[30:31]
++; GCN-NEXT:  .LBB0_3: ; %bb2
++; GCN-NEXT:    v_mov_b32_e32 v3, 0
++; GCN-NEXT:    v_mov_b32_e32 v4, v3
++; GCN-NEXT:    s_waitcnt vmcnt(0)
++; GCN-NEXT:    flat_store_dwordx2 v[1:2], v[3:4]
++; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
++; GCN-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
++; GCN-NEXT:    s_cbranch_execz .LBB0_2
++; GCN-NEXT:  .LBB0_4: ; %bb1
++; GCN-NEXT:    v_mov_b32_e32 v3, 0
++; GCN-NEXT:    v_mov_b32_e32 v4, v3
++; GCN-NEXT:    s_waitcnt vmcnt(0)
++; GCN-NEXT:    flat_store_dwordx2 v[1:2], v[3:4]
++; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
++; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
++; GCN-NEXT:    s_setpc_b64 s[30:31]
++bb:
++  %i = load ptr, ptr addrspace(1) null, align 8
++  br i1 %arg, label %bb1, label %bb2
++
++bb1:                                              ; preds = %bb
++  store double 0.000000e+00, ptr %i, align 8
++  br label %bb3
++
++bb2:                                              ; preds = %bb
++  store double 0.000000e+00, ptr %i, align 8
++  br label %bb3
++
++bb3:                                              ; preds = %bb2, %bb1
++  call void @llvm.dbg.value(metadata !DIArgList(ptr %i, i64 0), metadata !4, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 2712, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst, 2680, DW_OP_stack_value)), !dbg !9
++  ret void
++}
++
++attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
++
++!llvm.dbg.cu = !{!0}
++!llvm.module.flags = !{!3}
++
++!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2)
++!1 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
++!2 = !{}
++!3 = !{i32 1, !"Debug Info Version", i32 3}
++!4 = !DILocalVariable(name: "c", scope: !5, file: !1, line: 2, type: !8)
++!5 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
++!6 = !DISubroutineType(types: !7)
++!7 = !{!8}
++!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
++!9 = !DILocation(line: 0, scope: !5)
+diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir
+new file mode 100644
+index 000000000000000..3bdcc14936fb9bf
+--- /dev/null
++++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir
+@@ -0,0 +1,111 @@
++# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-opt-vgpr-liverange  %s -o - | FileCheck -check-prefix=GCN %s
++
++# SIOptimizeVGPRLiveRange shouldn't try to modify use of %5 in DBG_VALUE_LIST
++
++--- |
++  define void @dbg_instr_use(i1 %arg) #1 {
++    ret void
++  }
++
++  attributes #1 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx908" "uniform-work-group-size"="false" }
++  !llvm.dbg.cu = !{!0}
++  !llvm.module.flags = !{!3}
++
++  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2)
++  !1 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
++  !2 = !{}
++  !3 = !{i32 1, !"Debug Info Version", i32 3}
++  !4 = !DILocalVariable(name: "c", scope: !5, file: !1, line: 2, type: !8)
++  !5 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
++  !6 = !DISubroutineType(types: !7)
++  !7 = !{!8}
++  !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
++  !9 = !DILocation(line: 0, scope: !5)
++...
++
++---
++name:            dbg_instr_use
++tracksRegLiveness: true
++body:             |
++  ; GCN-LABEL: name: dbg_instr_use
++  ; GCN: bb.0:
++  ; GCN-NEXT:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
++  ; GCN-NEXT:   liveins: $vgpr0
++  ; GCN-NEXT: {{  $}}
++  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
++  ; GCN-NEXT:   [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, killed [[COPY]], implicit $exec
++  ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, killed [[V_AND_B32_e32_]], implicit $exec
++  ; GCN-NEXT:   [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 killed [[V_CMP_EQ_U32_e64_]], -1, implicit-def dead $scc
++  ; GCN-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
++  ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[V_MOV_B]], 0, 0, implicit $exec
++  ; GCN-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[S_XOR_B64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++  ; GCN-NEXT:   S_BRANCH %bb.3
++  ; GCN-NEXT: {{  $}}
++  ; GCN-NEXT: bb.1:
++  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
++  ; GCN-NEXT: {{  $}}
++  ; GCN-NEXT:   [[PHI:%[0-9]+]]:vreg_64 = PHI [[GLOBAL_LOAD_DWORDX2_]], %bb.0, undef %13:vreg_64, %bb.3
++  ; GCN-NEXT:   [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++  ; GCN-NEXT:   S_BRANCH %bb.2
++  ; GCN-NEXT: {{  $}}
++  ; GCN-NEXT: bb.2:
++  ; GCN-NEXT:   successors: %bb.4(0x80000000)
++  ; GCN-NEXT: {{  $}}
++  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
++  ; GCN-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
++  ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed [[PHI]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr
++  ; GCN-NEXT:   S_BRANCH %bb.4
++  ; GCN-NEXT: {{  $}}
++  ; GCN-NEXT: bb.3:
++  ; GCN-NEXT:   successors: %bb.1(0x80000000)
++  ; GCN-NEXT: {{  $}}
++  ; GCN-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
++  ; GCN-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_MOV_B32_e32_1]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
++  ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed [[GLOBAL_LOAD_DWORDX2_]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr
++  ; GCN-NEXT:   S_BRANCH %bb.1
++  ; GCN-NEXT: {{  $}}
++  ; GCN-NEXT: bb.4:
++  ; GCN-NEXT:   SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++  ; GCN-NEXT:   DBG_VALUE_LIST
++  ; GCN-NEXT-SAME: %9
++  ; GCN-NEXT:   SI_RETURN
++  bb.0:
++    successors: %bb.3(0x40000000), %bb.1(0x40000000)
++    liveins: $vgpr0
++  
++    %0:vgpr_32 = COPY $vgpr0
++    %1:vgpr_32 = V_AND_B32_e32 1, %0, implicit $exec
++    %2:sreg_64 = V_CMP_EQ_U32_e64 1, killed %1, implicit $exec
++    %3:sreg_64 = S_XOR_B64 killed %2, -1, implicit-def dead $scc
++    %4:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
++    %5:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %4, 0, 0, implicit $exec
++    %6:sreg_64 = SI_IF killed %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++    S_BRANCH %bb.3
++  
++  bb.1:
++    successors: %bb.2(0x40000000), %bb.4(0x40000000)
++  
++    %7:sreg_64 = SI_ELSE %6, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++    S_BRANCH %bb.2
++  
++  bb.2:
++    successors: %bb.4(0x80000000)
++  
++    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
++    %9:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %8, %subreg.sub1
++    FLAT_STORE_DWORDX2 %5, killed %9, 0, 0, implicit $exec, implicit $flat_scr
++    S_BRANCH %bb.4
++  
++  bb.3:
++    successors: %bb.1(0x80000000)
++  
++    %10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
++    %11:vreg_64 = REG_SEQUENCE %10, %subreg.sub0, %10, %subreg.sub1
++    FLAT_STORE_DWORDX2 %5, killed %11, 0, 0, implicit $exec, implicit $flat_scr
++    S_BRANCH %bb.1
++  
++  bb.4:
++    SI_END_CF %7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++    DBG_VALUE_LIST !4, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 2712, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst, 2680, DW_OP_stack_value), %5, 0, debug-location !9
++    SI_RETURN
++...
diff --git a/packages/l/llvm/files/fix-amdgpu-vgpr-live-range.patch b/packages/l/llvm/files/fix-amdgpu-vgpr-live-range.patch
new file mode 100644
index 00000000000..13d8bd43f1d
--- /dev/null
+++ b/packages/l/llvm/files/fix-amdgpu-vgpr-live-range.patch
@@ -0,0 +1,242 @@
+From 1e63f8272e8ad2b41f9c4d58250d7de264c4aa4b Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle@amd.com>
+Date: Wed, 26 Apr 2023 15:18:51 +0200
+Subject: [PATCH] AMDGPU: Fix an assertion in SIOptimizeVGPRLiveRange
+
+As the comment notes, the shader results in an INSERT_SUBREG with
+"undef" (dead) operand in the Endif block. The same can happen with
+REG_SEQUENCE. The register is considered dead from a liveness
+analysis perspective. The correct thing to do seems to be nothing:
+we keep the undef use of the register, the register allocator should
+still be able to take the liveness into account correctly.
+
+Differential Revision: https://reviews.llvm.org/D149161
+---
+ .../Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp | 11 ++-
+ llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll     | 90 +++++++++++++++++
+ .../si-opt-vgpr-liverange-bug-deadlanes.mir   | 97 +++++++++++++++++++
+ 3 files changed, 196 insertions(+), 2 deletions(-)
+ create mode 100644 llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
+ create mode 100644 llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir
+
+diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+index ae2c10116de8591..126f56100c64a36 100644
+--- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
++++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+@@ -522,8 +522,15 @@ void SIOptimizeVGPRLiveRange::optimizeLiveRange(
+     auto *UseBlock = UseMI->getParent();
+     // Replace uses in Endif block
+     if (UseBlock == Endif) {
+-      assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
+-      O.setReg(NewReg);
++      if (UseMI->isPHI()) {
++        O.setReg(NewReg);
++      } else {
++        // DetectDeadLanes may mark register uses as undef without removing
++        // them, in which case a non-phi instruction using the original register
++        // may exist in the Endif block even though the register is not live
++        // into it.
++        assert(!O.readsReg());
++      }
+       continue;
+     }
+ 
+diff --git a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
+new file mode 100644
+index 000000000000000..a9cbcf073af612c
+--- /dev/null
++++ b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
+@@ -0,0 +1,90 @@
++; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-codegenprepare-break-large-phis=false < %s | FileCheck %s
++
++; CHECK-LABEL: {{^}}_amdgpu_ps_main:
++;
++; This test case used to hit an assertion in SIOptimizeVGPRLiveRange because of a vector with dead lanes
++; leading to an effectively dead INSERT_SUBREG.
++
++
++define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 %descTable2) #0 {
++.entry:
++  %i2 = zext i32 %descTable2 to i64
++  %i4 = inttoptr i64 %i2 to ptr addrspace(4)
++  %i159 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float poison, float poison, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
++  %i1540 = shufflevector <4 x float> %i159, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
++  %i1526 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 2688, i32 0)
++  %i1746 = load <4 x i32>, ptr addrspace(4) %i4, align 16
++  br label %bb1750
++
++bb1750:                                           ; preds = %bb1897, %.entry
++  %__llpc_global_proxy_r3.12.vec.extract2358295 = phi i32 [ 0, %.entry ], [ %__llpc_global_proxy_r3.12.vec.extract2358, %bb1897 ]
++  %__llpc_global_proxy_r13.20293 = phi <4 x i32> [ undef, %.entry ], [ %__llpc_global_proxy_r13.22, %bb1897 ]
++  %__llpc_global_proxy_r10.19291 = phi <4 x i32> [ poison, %.entry ], [ %i1914, %bb1897 ]
++  %i1751 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i1746, i32 poison, i32 0, i32 0, i32 0)
++  %i1754 = shufflevector <4 x i32> %__llpc_global_proxy_r10.19291, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
++  %__llpc_global_proxy_r7.12.vec.extract1953260 = bitcast float %i1751 to i32
++  %i1760 = or i32 %__llpc_global_proxy_r7.12.vec.extract1953260, %__llpc_global_proxy_r3.12.vec.extract2358295
++  %i1786 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %i1760, i32 0)
++  %__llpc_global_proxy_r11.12.vec.insert1237 = insertelement <4 x i32> %i1754, i32 %i1786, i64 3
++  %.not2783 = icmp eq i32 %i1786, 0
++  br i1 %.not2783, label %bb1789, label %bb1787
++
++bb1787:                                           ; preds = %bb1750
++  %i1788 = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> %__llpc_global_proxy_r13.20293, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
++  br label %bb1897
++
++bb1789:                                           ; preds = %bb1750
++  %i1796 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float poison, float 0.000000e+00, float 1.000000e+00)
++  %i1797 = fmul reassoc nnan nsz arcp contract afn float %i1796, %i1796
++  %i1800 = fdiv reassoc nnan nsz arcp contract afn float %i1797, 0.000000e+00
++  %i1801 = bitcast float %i1800 to i32
++  %__llpc_global_proxy_r11.12.vec.insert1245 = insertelement <4 x i32> %__llpc_global_proxy_r11.12.vec.insert1237, i32 poison, i64 3
++  %i1818 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float poison, float 0.000000e+00, float 1.000000e+00)
++  %i1819 = bitcast float %i1818 to i32
++  %i1878 = shufflevector <4 x i32> %__llpc_global_proxy_r11.12.vec.insert1245, <4 x i32> poison, <3 x i32> <i32 3, i32 3, i32 3>
++  %i1879 = bitcast <3 x i32> %i1878 to <3 x float>
++  %i1881 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1540, %i1879
++  %i1882 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 poison, i32 0)
++  %i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
++  %i1884 = bitcast <4 x i32> %i1883 to <4 x float>
++  %i1885 = shufflevector <4 x float> %i1884, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
++  %i1886 = insertelement <3 x i32> undef, i32 %i1819, i64 0
++  %i1887 = bitcast <3 x i32> %i1886 to <3 x float>
++  %i1888 = insertelement <3 x i32> undef, i32 %i1801, i64 0
++  %i1889 = bitcast <3 x i32> %i1888 to <3 x float>
++  %i1890 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1887, %i1889
++  %i1891 = shufflevector <3 x float> %i1890, <3 x float> poison, <3 x i32> zeroinitializer
++  %i1892 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1885, %i1891
++  %i1893 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1892, %i1881
++  %i1894 = bitcast <3 x float> %i1893 to <3 x i32>
++  %i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
++  %i1896 = insertelement <4 x i32> %i1895, i32 %i1819, i64 3
++  br label %bb1897
++
++bb1897:                                           ; preds = %bb1789, %bb1787
++  %__llpc_global_proxy_r11.19 = phi <4 x i32> [ %__llpc_global_proxy_r11.12.vec.insert1237, %bb1787 ], [ %__llpc_global_proxy_r11.12.vec.insert1245, %bb1789 ]
++  %__llpc_global_proxy_r13.22 = phi <4 x i32> [ %i1788, %bb1787 ], [ %i1896, %bb1789 ]
++  %i1898 = shufflevector <4 x i32> %__llpc_global_proxy_r11.19, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
++  %i1899 = bitcast <3 x i32> %i1898 to <3 x float>
++  %i1900 = shufflevector <4 x i32> %__llpc_global_proxy_r13.22, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
++  %i1901 = bitcast <3 x i32> %i1900 to <3 x float>
++  %i1902 = fadd reassoc nnan nsz arcp contract afn <3 x float> %i1901, %i1899
++  %i1903 = bitcast <3 x float> %i1902 to <3 x i32>
++  %i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
++  %i1908 = shufflevector <4 x i32> %i1907, <4 x i32> %__llpc_global_proxy_r11.19, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
++  %i1914 = shufflevector <4 x i32> %i1908, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
++  %__llpc_global_proxy_r3.12.vec.extract2358 = extractelement <2 x i32> zeroinitializer, i64 1
++  %.not2780.not = icmp ult i32 %__llpc_global_proxy_r3.12.vec.extract2358, %i1526
++  br i1 %.not2780.not, label %bb1750, label %._crit_edge298
++
++._crit_edge298:                                   ; preds = %bb1897
++  ret void
++}
++
++declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
++declare float @llvm.amdgcn.fmed3.f32(float, float, float)
++declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg)
++declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
++declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
++
++attributes #0 = { "target-features"=",+wavefrontsize64,+cumode" }
+diff --git a/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir b/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir
+new file mode 100644
+index 000000000000000..71c408ced015b78
+--- /dev/null
++++ b/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir
+@@ -0,0 +1,97 @@
++# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
++# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=si-opt-vgpr-liverange -o - %s | FileCheck -check-prefixes=CHECK %s
++
++# Tests a case that used to assert in SIOptimizeVGPRLiveRange when trying to optimize %3 which still appears
++# (though in an undef operand) in the REG_SEQUENCE of the "endif block". This undef pattern was caused by
++# DetectDeadLanes.
++
++--- |
++  define dllexport amdgpu_ps void @_amdgpu_ps_main() #0 {
++    unreachable
++  }
++
++  attributes #0 = { "target-cpu"="gfx1100" "target-features"=",+wavefrontsize64,+cumode" "uniform-work-group-size"="false" }
++...
++---
++name:            _amdgpu_ps_main
++tracksRegLiveness: true
++registers:
++  - { id: 1, class: vgpr_32, preferred-register: '' }
++  - { id: 2, class: vgpr_32, preferred-register: '' }
++  - { id: 3, class: vgpr_32, preferred-register: '' }
++  - { id: 4, class: sreg_64, preferred-register: '$vcc' }
++  - { id: 5, class: vreg_128, preferred-register: '' }
++  - { id: 6, class: sgpr_128, preferred-register: '' }
++  - { id: 7, class: vgpr_32, preferred-register: '' }
++liveins:
++  - { reg: '$vgpr0', virtual-reg: '%1' }
++body:             |
++  ; CHECK-LABEL: name: _amdgpu_ps_main
++  ; CHECK: bb.0:
++  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
++  ; CHECK-NEXT:   liveins: $vgpr0
++  ; CHECK-NEXT: {{  $}}
++  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
++  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
++  ; CHECK-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN killed [[COPY]], undef %5:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
++  ; CHECK-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[BUFFER_LOAD_DWORD_OFFEN]], implicit $exec
++  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++  ; CHECK-NEXT:   S_BRANCH %bb.1
++  ; CHECK-NEXT: {{  $}}
++  ; CHECK-NEXT: bb.1:
++  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
++  ; CHECK-NEXT: {{  $}}
++  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, undef %4.sub3, %subreg.sub3
++  ; CHECK-NEXT: {{  $}}
++  ; CHECK-NEXT: bb.2:
++  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
++  ; CHECK-NEXT: {{  $}}
++  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:vreg_128 = PHI undef %10:vreg_128, %bb.0, [[REG_SEQUENCE]], %bb.1
++  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[BUFFER_LOAD_DWORD_OFFEN]], %bb.0, undef %15:vgpr_32, %bb.1
++  ; CHECK-NEXT:   [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++  ; CHECK-NEXT:   S_BRANCH %bb.3
++  ; CHECK-NEXT: {{  $}}
++  ; CHECK-NEXT: bb.3:
++  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
++  ; CHECK-NEXT: {{  $}}
++  ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PHI1]], %subreg.sub0, [[PHI1]], %subreg.sub1, [[PHI1]], %subreg.sub2, undef %6:vgpr_32, %subreg.sub3
++  ; CHECK-NEXT: {{  $}}
++  ; CHECK-NEXT: bb.4:
++  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:vreg_128 = PHI [[PHI]], %bb.2, [[REG_SEQUENCE1]], %bb.3
++  ; CHECK-NEXT:   SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++  ; CHECK-NEXT:   dead [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PHI2]].sub2, %subreg.sub0, [[PHI2]].sub2, %subreg.sub1, [[PHI2]].sub2, %subreg.sub2, undef [[BUFFER_LOAD_DWORD_OFFEN]], %subreg.sub3
++  ; CHECK-NEXT:   S_ENDPGM 0
++  bb.0:
++    successors: %bb.5(0x40000000), %bb.6(0x40000000)
++    liveins: $vgpr0
++
++    %1:vgpr_32 = COPY killed $vgpr0
++    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
++    %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %1, undef %6:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
++    %4:sreg_64 = V_CMP_NE_U32_e64 0, %3, implicit $exec
++    %8:sreg_64 = SI_IF killed %4, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++    S_BRANCH %bb.5
++
++  bb.5:
++    successors: %bb.6(0x80000000)
++
++    %9:vreg_128 = REG_SEQUENCE %2, %subreg.sub0, %2, %subreg.sub1, %2, %subreg.sub2, undef %5.sub3:vreg_128, %subreg.sub3
++
++  bb.6:
++    successors: %bb.7(0x40000000), %bb.8(0x40000000)
++
++    %10:vreg_128 = PHI undef %156:vreg_128, %bb.0, %9, %bb.5
++    %11:sreg_64 = SI_ELSE killed %8, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++    S_BRANCH %bb.7
++
++  bb.7:
++    successors: %bb.8(0x80000000)
++
++    %12:vreg_128 = REG_SEQUENCE %3, %subreg.sub0, %3, %subreg.sub1, killed %3, %subreg.sub2, undef %7, %subreg.sub3
++
++  bb.8:
++    %13:vreg_128 = PHI %10, %bb.6, %12, %bb.7
++    SI_END_CF killed %11, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
++    %5:vreg_128 = REG_SEQUENCE %13.sub2, %subreg.sub0, %13.sub2, %subreg.sub1, killed %13.sub2, %subreg.sub2, undef %3, %subreg.sub3
++    S_ENDPGM 0
++...
diff --git a/packages/l/llvm/package.yml b/packages/l/llvm/package.yml
index cfe35e5b9da..194f48b3df8 100644
--- a/packages/l/llvm/package.yml
+++ b/packages/l/llvm/package.yml
@@ -1,6 +1,6 @@
 name       : llvm
 version    : 16.0.6
-release    : 112
+release    : 113
 source     :
     - https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.6/llvm-project-16.0.6.src.tar.xz : ce5e71081d17ce9e86d7cbcfa28c4b04b9300f8fb7e78422b1feb6bc52c3028e
 homepage   : http://llvm.org/
@@ -193,6 +193,8 @@ setup      : |
 
     # AMDGPU related
     %patch -p1 -i $pkgfiles/fix-amdgpu-si-fold-operands-assertion-error.patch
+    %patch -p1 -i $pkgfiles/fix-amdgpu-vgpr-live-range.patch
+    %patch -p1 -i $pkgfiles/amdgpu-skip-debug-instrs-in-vgpr.patch
 
     pushd clang
         # Patch cfe for Solus specific options
diff --git a/packages/l/llvm/pspec_x86_64.xml b/packages/l/llvm/pspec_x86_64.xml
index 49b1df744e6..440da71ed03 100644
--- a/packages/l/llvm/pspec_x86_64.xml
+++ b/packages/l/llvm/pspec_x86_64.xml
@@ -3,8 +3,8 @@
         <Name>llvm</Name>
         <Homepage>http://llvm.org/</Homepage>
         <Packager>
-            <Name>Joey Riches</Name>
-            <Email>josephriches@gmail.com</Email>
+            <Name>Gavin Zhao</Name>
+            <Email>git@gzgz.dev</Email>
         </Packager>
         <License>Apache-2.0-with-LLVM-exception</License>
         <PartOf>programming</PartOf>
@@ -220,7 +220,7 @@
 </Description>
         <PartOf>emul32</PartOf>
         <RuntimeDependencies>
-            <Dependency release="112">llvm</Dependency>
+            <Dependency release="113">llvm</Dependency>
         </RuntimeDependencies>
         <Files>
             <Path fileType="library">/usr/lib32/LLVMgold.so</Path>
@@ -365,8 +365,8 @@
 </Description>
         <PartOf>programming.devel</PartOf>
         <RuntimeDependencies>
-            <Dependency release="112">llvm-devel</Dependency>
-            <Dependency release="112">llvm-32bit</Dependency>
+            <Dependency release="113">llvm-devel</Dependency>
+            <Dependency release="113">llvm-32bit</Dependency>
         </RuntimeDependencies>
         <Files>
             <Path fileType="header">/usr/include/llvm/Config/llvm-config-32.h</Path>
@@ -420,8 +420,8 @@
 </Description>
         <PartOf>programming</PartOf>
         <RuntimeDependencies>
-            <Dependency release="112">llvm</Dependency>
-            <Dependency releaseFrom="112">llvm-devel</Dependency>
+            <Dependency release="113">llvm</Dependency>
+            <Dependency releaseFrom="113">llvm-devel</Dependency>
         </RuntimeDependencies>
         <Files>
             <Path fileType="executable">/usr/bin/UnicodeNameMappingGenerator</Path>
@@ -850,7 +850,7 @@
 </Description>
         <PartOf>emul32</PartOf>
         <RuntimeDependencies>
-            <Dependency release="112">llvm-32bit</Dependency>
+            <Dependency release="113">llvm-32bit</Dependency>
         </RuntimeDependencies>
         <Files>
             <Path fileType="library">/usr/lib32/libclang-cpp.so</Path>
@@ -870,8 +870,8 @@
 </Description>
         <PartOf>programming.devel</PartOf>
         <RuntimeDependencies>
-            <Dependency releaseFrom="112">llvm-clang-32bit</Dependency>
-            <Dependency releaseFrom="112">llvm-clang-devel</Dependency>
+            <Dependency releaseFrom="113">llvm-clang-32bit</Dependency>
+            <Dependency releaseFrom="113">llvm-clang-devel</Dependency>
         </RuntimeDependencies>
         <Files>
             <Path fileType="library">/usr/lib32/clang/16/include/__clang_cuda_builtin_vars.h</Path>
@@ -1166,7 +1166,7 @@
 </Description>
         <PartOf>programming.devel</PartOf>
         <RuntimeDependencies>
-            <Dependency releaseFrom="112">llvm-clang</Dependency>
+            <Dependency releaseFrom="113">llvm-clang</Dependency>
         </RuntimeDependencies>
         <Files>
             <Path fileType="header">/usr/include/clang-c/BuildSystem.h</Path>
@@ -2348,7 +2348,7 @@
 </Description>
         <PartOf>programming.devel</PartOf>
         <RuntimeDependencies>
-            <Dependency release="112">llvm</Dependency>
+            <Dependency release="113">llvm</Dependency>
         </RuntimeDependencies>
         <Files>
             <Path fileType="executable">/usr/bin/FileCheck</Path>
@@ -6953,12 +6953,12 @@
         </Files>
     </Package>
     <History>
-        <Update release="112">
-            <Date>2023-10-30</Date>
+        <Update release="113">
+            <Date>2023-11-05</Date>
             <Version>16.0.6</Version>
             <Comment>Packaging update</Comment>
-            <Name>Joey Riches</Name>
-            <Email>josephriches@gmail.com</Email>
+            <Name>Gavin Zhao</Name>
+            <Email>git@gzgz.dev</Email>
         </Update>
     </History>
-</PISI>
\ No newline at end of file
+</PISI>