Skip to content

[JIT] Range check elimination #9380

Description

@omariom

I don't think there is perf impact. Just an interesting case which may point to something important.

The difference is in these lines:

mov      dword ptr [rcx+rax+16], r8d
lea      rax, bword ptr [rcx+rax+16]
;vs
lea      rax, bword ptr [rcx+rax+16]
mov      dword ptr [rax], r8d
[MethodImpl(MethodImplOptions.NoInlining)]
private static void TestRangeCheckElimination(Entry[] entries, int index, int value)
{
    entries[index].hashCode = value;
    entries[index].next     = value;
    entries[index].key      = value;
    entries[index].value    = value;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void TestRangeCheckEliminationWithRef(Entry[] entries, int index, int value)
{
    ref Entry entry = ref entries[index];

    entry.hashCode = value;
    entry.next     = value;
    entry.key      = value;
    entry.value    = value;
}
Assembly for TestRangeCheckElimination

; Assembly listing for method TestDotNetCore.Program:TestRangeCheckElimination(ref,int,int)
; Emitting BLENDED_CODE for X64 CPU with AVX
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T00] ( 15, 15   )     ref  ->  rcx         class-hnd
;  V01 arg1         [V01,T03] ( 12, 12   )     int  ->  rdx        
;  V02 arg2         [V02,T01] ( 14, 14   )     int  ->   r8        
;  V03 OutArgs      [V03    ] (  1,  1   )  lclBlk (32) [rsp+0x00]  
;  V04 cse0         [V04,T02] ( 14, 14   )   byref  ->  rax        
;  V05 cse1         [V05,T04] ( 10, 10   )    long  ->  rax        
;  V06 cse2         [V06,T05] (  5,  5   )     int  ->  rax        
;
; Lcl frame size = 40

G_M10648_IG01:
       4883EC28             sub      rsp, 40

G_M10648_IG02:
       8B4108               mov      eax, dword ptr [rcx+8]
       3BD0                 cmp      edx, eax
       7322                 jae      SHORT G_M10648_IG04
       4863C2               movsxd   rax, edx
       48C1E004             shl      rax, 4
       4489440110           mov      dword ptr [rcx+rax+16], r8d
       488D440110           lea      rax, bword ptr [rcx+rax+16]
       44894004             mov      dword ptr [rax+4], r8d
       44894008             mov      dword ptr [rax+8], r8d
       4489400C             mov      dword ptr [rax+12], r8d

G_M10648_IG03:
       4883C428             add      rsp, 40
       C3                   ret      

G_M10648_IG04:
       E86E50305F           call     CORINFO_HELP_RNGCHKFAIL
       CC                   int3     

; Total bytes of code 51, prolog size 4 for method 

Assembly for TestRangeCheckEliminationWithRe

; Assembly listing for method TestDotNetCore.Program:TestRangeCheckEliminationWithRef(ref,int,int)
; Emitting BLENDED_CODE for X64 CPU with AVX
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T02] (  4,  4   )     ref  ->  rcx         class-hnd
;  V01 arg1         [V01,T03] (  4,  4   )     int  ->  rdx        
;  V02 arg2         [V02,T01] (  6,  6   )     int  ->   r8        
;  V03 tmp0         [V03,T00] (  5, 10   )   byref  ->  rax        
;  V04 OutArgs      [V04    ] (  1,  1   )  lclBlk (32) [rsp+0x00]  
;
; Lcl frame size = 40

G_M60939_IG01:
       4883EC28             sub      rsp, 40

G_M60939_IG02:
       3B5108               cmp      edx, dword ptr [rcx+8]
       7320                 jae      SHORT G_M60939_IG04
       4863C2               movsxd   rax, edx
       48C1E004             shl      rax, 4
       488D440110           lea      rax, bword ptr [rcx+rax+16]
       448900               mov      dword ptr [rax], r8d
       44894004             mov      dword ptr [rax+4], r8d
       44894008             mov      dword ptr [rax+8], r8d
       4489400C             mov      dword ptr [rax+12], r8d

G_M60939_IG03:
       4883C428             add      rsp, 40
       C3                   ret      

G_M60939_IG04:
       E82250305F           call     CORINFO_HELP_RNGCHKFAIL
       CC                   int3     

; Total bytes of code 47, prolog size 4 for method

cc @mikedn

category:cq
theme:basic-cq
skill-level:expert
cost:medium

Metadata

Metadata

Assignees

No one assigned

    Labels

    JitUntriagedCLR JIT issues needing additional triagearea-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIenhancementProduct code improvement that does NOT require public API changes/additionsoptimizationtenet-performancePerformance related issue

    Type

    No type

    Fields

    No fields configured for issues without a type.

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions