Skip to content

Commit

Permalink
[Statepoints] Update DAG root after emitting statepoint.
Browse files Browse the repository at this point in the history
Since we always generate CopyToRegs for statepoint results,
we must update DAG root after emitting statepoint, so that
these copies are scheduled before any possible local uses.
Note: getControlRoot() flushes all PendingExports, not only
those we generates for relocates. If that'll become a problem,
we can change it to flushing relocate exports only.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D87251
  • Loading branch information
dantrushin committed Sep 9, 2020
1 parent b29bdab commit 4358fa7
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 3 deletions.
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
Expand Up @@ -841,7 +841,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
Register Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), Reg, RetTy, None);
SDValue Chain = DAG.getEntryNode();
SDValue Chain = DAG.getRoot();
RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr);
PendingExports.push_back(Chain);

Expand Down Expand Up @@ -919,8 +919,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// Remove original call node
DAG.DeleteNode(CallNode);

// DON'T set the root - under the assumption that it's already set past the
// inserted node we created.
// Since we always emit CopyToRegs (even for local relocates), we must
// update root, so that they are emitted before any local uses.
(void)getControlRoot();

// TODO: A better future implementation would be to emit a single variable
// argument, variable return value STATEPOINT node here and then hookup the
Expand Down
88 changes: 88 additions & 0 deletions llvm/test/CodeGen/X86/statepoint-vreg.ll
Expand Up @@ -8,8 +8,12 @@ declare i1 @return_i1()
declare void @func()
declare void @"some_call"(i64 addrspace(1)*)
declare void @consume(i32 addrspace(1)*)
declare i32 @consume1(i32) gc "statepoint-example"
declare void @consume2(i32 addrspace(1)*, i32 addrspace(1)*)
declare void @consume3(float) gc "statepoint-example"
declare float @consume4(i64) gc "statepoint-example"
declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*)

declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*)
declare i32 @"personality_function"()

Expand Down Expand Up @@ -590,6 +594,90 @@ entry:
ret void
}

; test multiple statepoints/relocates within single block.
; relocates must be properly scheduled w.r.t. statepoints
define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-example" {
; CHECK-LABEL: test_sched:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset %rbx, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: movl %edi, %ebp
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: callq consume3
; CHECK-NEXT: .Ltmp25:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2sd %ebp, %xmm0
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: .Ltmp26:
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss %xmm0, (%rsp)
; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: .Ltmp27:
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss %xmm0, (%rsp)
; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: .Ltmp28:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: xorpd %xmm0, %xmm0
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: ucomisd %xmm0, %xmm1
; CHECK-NEXT: movabsq $9223372036854775807, %rdi # imm = 0x7FFFFFFFFFFFFFFF
; CHECK-NEXT: cmovbeq %rax, %rdi
; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss %xmm0, (%rsp)
; CHECK-NEXT: nopl 8(%rax,%rax)
; CHECK-NEXT: .Ltmp29:
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%token0 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 0, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %2) ]
%reloc1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token0, i32 0, i32 0) ; (%2, %2)
%tmp1 = sitofp i32 %1 to double
%to_max.i29 = fcmp ogt double %tmp1, 0.000000e+00
%token1 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %reloc1) ]
%reloc2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1)
%reloc3 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1)
%token2 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc2, i8 addrspace(1)* %reloc3) ]
%reloc4 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 0, i32 0) ; (%reloc3, %reloc2)
%reloc5 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 1, i32 1) ; (%reloc3, %reloc3)
%token3 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 5, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc4, i8 addrspace(1)* %reloc5) ]
%reloc6 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token3, i32 1, i32 0) ; (%reloc5, %reloc4)
%tmp5 = select i1 %to_max.i29, i64 9223372036854775807, i64 0
%token4 = call token (i64, i32, float (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 2, i32 5, float (i64)* nonnull @consume4, i32 1, i32 0, i64 %tmp5, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"() ]
ret void
}

declare token @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 immarg, i32 immarg, float (i64)*, i32 immarg, i32 immarg, ...)
declare token @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 immarg, i32 immarg, i32 (i32)*, i32 immarg, i32 immarg, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 immarg, i32 immarg, void (float)*, i32 immarg, i32 immarg, ...)
declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
Expand Down

0 comments on commit 4358fa7

Please sign in to comment.