Skip to content

JIT: simple benchmark highlights some CQ issues #10114

Description

@AndyAyersMS

Here are some simple methods where codegen could be improved. Original from aspnet/SignalR#1900.

    [MethodImpl(MethodImplOptions.NoInlining)]
    private bool InMethod(in bool? value1, in bool? value2, in bool? value3)
    {
        return value1.HasValue && value2.HasValue && value3.HasValue;
    }
    
    [MethodImpl(MethodImplOptions.NoInlining)]
    private bool NormalMethod(bool? value1, bool? value2, bool? value3)
    {
        return value1.HasValue && value2.HasValue && value3.HasValue;
    }
    
    [MethodImpl(MethodImplOptions.NoInlining)]
    private bool InMethod(in bool value1, in bool value2, in bool value3)
    {
        return value1 && value2 && value3;
    }
    
    [MethodImpl(MethodImplOptions.NoInlining)]
    private bool NormalMethod(bool value1, bool value2, bool value3)
    {
        return value1 && value2 && value3;
    }

current codegen below

; Assembly listing for method P:InMethod(byref,byref,byref):bool:this
; Emitting BLENDED_CODE for X64 CPU with AVX
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
;* V00 this         [V00    ] (  0,  0   )     ref  ->  zero-ref    this class-hnd
;  V01 arg1         [V01,T00] (  4,  4   )   byref  ->  rdx
;  V02 arg2         [V02,T01] (  4,  3   )   byref  ->   r8
;  V03 arg3         [V03,T02] (  4,  3   )   byref  ->   r9
;* V04 loc0         [V04    ] (  0,  0   )  struct ( 8) zero-ref    ld-addr-op
;  V05 tmp0         [V05,T03] (  6,  4   )    bool  ->  rax         V04.hasValue(offs=0x00) P-INDEP
;* V06 tmp1         [V06    ] (  0,  0   )    bool  ->  zero-ref    V04.value(offs=0x01) P-INDEP
;# V07 OutArgs      [V07    ] (  1,  1   )  lclBlk ( 0) [rsp+0x00]
;
; Lcl frame size = 0

G_M21426_IG01:

G_M21426_IG02:
       0FB602               movzx    rax, byte  ptr [rdx]
       0FB65201             movzx    rdx, byte  ptr [rdx+1]  // not needed
       84C0                 test     al, al
       741A                 je       SHORT G_M21426_IG04
       410FB600             movzx    rax, byte  ptr [r8]
       410FB65001           movzx    rdx, byte  ptr [r8+1]   // not needed
       84C0                 test     al, al
       740D                 je       SHORT G_M21426_IG04
       410FB601             movzx    rax, byte  ptr [r9]
       410FB65101           movzx    rdx, byte  ptr [r9+1]   // not needed
       0FB6C0               movzx    rax, al

G_M21426_IG03:
       C3                   ret

G_M21426_IG04:
       33C0                 xor      eax, eax

G_M21426_IG05:
       C3                   ret

; Total bytes of code 40, prolog size 0 for method P:InMethod(byref,byref,byref):bool:this
; ============================================================
; Assembly listing for method P:NormalMethod(struct,struct,struct):bool:this
; Emitting BLENDED_CODE for X64 CPU with AVX
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
;* V00 this         [V00    ] (  0,  0   )     ref  ->  zero-ref    this class-hnd
;  V01 arg1         [V01,T00] (  3,  3   )  struct ( 8) [rsp+0x10]   do-not-enreg[SF] ld-addr-op
;  V02 arg2         [V02,T01] (  3,  2.50)  struct ( 8) [rsp+0x18]   do-not-enreg[SF] ld-addr-op
;  V03 arg3         [V03,T02] (  3,  2.50)  struct ( 8) [rsp+0x20]   do-not-enreg[SF] ld-addr-op
;# V04 OutArgs      [V04    ] (  1,  1   )  lclBlk ( 0) [rsp+0x00]
;
; Lcl frame size = 0

G_M43454_IG01:
       4889542410           mov      qword ptr [rsp+10H], rdx   // could avoid these stores
       4C89442418           mov      qword ptr [rsp+18H], r8
       4C894C2420           mov      qword ptr [rsp+20H], r9

G_M43454_IG02:
       807C241000           cmp      byte  ptr [rsp+10H], 0   // could be [rdx]
       740D                 je       SHORT G_M43454_IG04
       807C241800           cmp      byte  ptr [rsp+18H], 0   // could be [r8]
       7406                 je       SHORT G_M43454_IG04
       0FB6442420           movzx    rax, byte  ptr [rsp+20H]   // could be [r9]

G_M43454_IG03:
       C3                   ret

G_M43454_IG04:
       33C0                 xor      eax, eax

G_M43454_IG05:
       C3                   ret

; Total bytes of code 38, prolog size 0 for method P:NormalMethod(struct,struct,struct):bool:this
; ============================================================
; Assembly listing for method P:InMethod(byref,byref,byref):bool:this
; Emitting BLENDED_CODE for X64 CPU with AVX
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
;* V00 this         [V00    ] (  0,  0   )     ref  ->  zero-ref    this class-hnd
;  V01 arg1         [V01,T00] (  3,  3   )   byref  ->  rdx
;  V02 arg2         [V02,T01] (  3,  3   )   byref  ->   r8
;  V03 arg3         [V03,T02] (  3,  3   )   byref  ->   r9
;# V04 OutArgs      [V04    ] (  1,  1   )  lclBlk ( 0) [rsp+0x00]
;
; Lcl frame size = 0

G_M21423_IG01:

G_M21423_IG02:
       0FB602               movzx    rax, byte  ptr [rdx]
       410FB610             movzx    rdx, byte  ptr [r8]
       23C2                 and      eax, edx
       410FB611             movzx    rdx, byte  ptr [r9]
       23C2                 and      eax, edx

G_M21423_IG03:
       C3                   ret

; Total bytes of code 16, prolog size 0 for method P:InMethod(byref,byref,byref):bool:this
; ============================================================
; Assembly listing for method P:NormalMethod(bool,bool,bool):bool:this
; Emitting BLENDED_CODE for X64 CPU with AVX
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
;* V00 this         [V00    ] (  0,  0   )     ref  ->  zero-ref    this class-hnd
;  V01 arg1         [V01,T00] (  3,  3   )    bool  ->  rdx
;  V02 arg2         [V02,T01] (  3,  3   )    bool  ->   r8
;  V03 arg3         [V03,T02] (  3,  3   )    bool  ->   r9
;# V04 OutArgs      [V04    ] (  1,  1   )  lclBlk ( 0) [rsp+0x00]
;
; Lcl frame size = 0

G_M43451_IG01:

G_M43451_IG02:
       0FB6C2               movzx    rax, dl    // could avoid widening here
       410FB6D0             movzx    rdx, r8b   // and here
       23C2                 and      eax, edx
       410FB6D1             movzx    rdx, r9b   // and here
       23C2                 and      eax, edx
       0FB6C0               movzx    rax, al

G_M43451_IG03:
       C3                   ret

; Total bytes of code 19, prolog size 0 for method P:NormalMethod(bool,bool,bool):bool:this

This last methods codegen would ideally be:

mov al, dl
and al, r8b
and al, r9b
movzx, rax, al
ret

category:cq
theme:basic-cq
skill-level:intermediate
cost:medium

Metadata

Metadata

Assignees

No one assigned

    Labels

    JitUntriagedCLR JIT issues needing additional triagearea-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIoptimization

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions