Remove AO from a couple of SpanHelpers methods #85819

EgorBo · 2023-05-05T12:36:56Z

Contributes to #85791 (removes a couple of methods jitted during Hello World start).

   3: JIT compiled System.SpanHelpers:IndexOfNullCharacter(ulong) [Tier1, IL size=805, code size=391]
   9: JIT compiled System.SpanHelpers:IndexOfNullByte(ulong) [Tier1, IL size=844, code size=459]
  25: JIT compiled System.SpanHelpers:SequenceCompareTo(byref,int,byref,int) [Tier1, IL size=568, code size=329]

I don't see a good reason for these to have [AO], R2R'd versions look good enough to me (SSE based):

R2R codegen:

; Assembly listing for method System.SpanHelpers:IndexOfNullCharacter(ulong):int
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; ReadyToRun compilation
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data

G_M000_IG01:                ;; offset=0000H
       sub      rsp, 40
       vzeroupper

G_M000_IG02:                ;; offset=0007H
       xor      eax, eax
       mov      edx, 0x7FFFFFFF
       test     cl, 1
       jne      SHORT G_M000_IG04

G_M000_IG03:                ;; offset=0013H
       mov      edx, ecx
       neg      edx
       mov      r8d, edx
       shr      r8d, 31
       add      edx, r8d
       sar      edx, 1
       and      rdx, 7

G_M000_IG04:                ;; offset=0027H
       cmp      rdx, 4
       jl       SHORT G_M000_IG06

G_M000_IG05:                ;; offset=002DH
       cmp      word  ptr [rcx+2*rax], 0
       je       G_M000_IG21
       cmp      word  ptr [rcx+2*rax+02H], 0
       je       G_M000_IG20
       cmp      word  ptr [rcx+2*rax+04H], 0
       je       G_M000_IG19
       cmp      word  ptr [rcx+2*rax+06H], 0
       je       G_M000_IG18
       add      rax, 4
       add      rdx, -4
       cmp      rdx, 4
       jge      SHORT G_M000_IG05

G_M000_IG06:                ;; offset=006AH
       test     rdx, rdx
       jle      SHORT G_M000_IG08

G_M000_IG07:                ;; offset=006FH
       cmp      word  ptr [rcx+2*rax], 0
       je       G_M000_IG21
       inc      rax
       dec      rdx
       test     rdx, rdx
       jg       SHORT G_M000_IG07

G_M000_IG08:                ;; offset=0085H
       cmp      rax, 0x7FFFFFFF
       jge      G_M000_IG22
       lea      rdx, [rcx+2*rax]
       test     dl, 31
       je       SHORT G_M000_IG11

G_M000_IG09:                ;; offset=009AH
       vxorps   xmm0, xmm0, xmm0
       vpcmpeqw xmm0, xmm0, xmmword ptr [rcx+2*rax]
       vpmovmskb edx, xmm0
       test     edx, edx
       jne      SHORT G_M000_IG10
       add      rax, 8
       jmp      SHORT G_M000_IG11

G_M000_IG10:                ;; offset=00B1H
       xor      ecx, ecx
       tzcnt    ecx, edx
       shr      ecx, 1
       mov      edx, ecx
       add      eax, edx
       jmp      G_M000_IG21

G_M000_IG11:                ;; offset=00C2H
       mov      rdx, rax
       neg      rdx
       add      rdx, 0x7FFFFFFF
       and      rdx, -16
       jle      SHORT G_M000_IG13

G_M000_IG12:                ;; offset=00D5H
       vxorps   ymm0, ymm0, ymm0
       vpcmpeqw ymm0, ymm0, ymmword ptr [rcx+2*rax]
       vpmovmskb r8d, ymm0
       test     r8d, r8d
       jne      SHORT G_M000_IG15
       add      rax, 16
       add      rdx, -16
       test     rdx, rdx
       jg       SHORT G_M000_IG12

G_M000_IG13:                ;; offset=00F4H
       mov      r8, rax
       neg      r8
       add      r8, 0x7FFFFFFF
       and      r8, -8
       jle      SHORT G_M000_IG17

G_M000_IG14:                ;; offset=0107H
       vxorps   xmm0, xmm0, xmm0
       vpcmpeqw xmm0, xmm0, xmmword ptr [rcx+2*rax]
       vpmovmskb edx, xmm0
       test     edx, edx
       jne      SHORT G_M000_IG16
       add      rax, 8
       jmp      SHORT G_M000_IG17

G_M000_IG15:                ;; offset=011EH
       xor      edx, edx
       tzcnt    edx, r8d
       shr      edx, 1
       mov      ecx, edx
       add      eax, ecx
       jmp      SHORT G_M000_IG21

G_M000_IG16:                ;; offset=012DH
       tzcnt    edx, edx
       shr      edx, 1
       add      eax, edx
       jmp      SHORT G_M000_IG21

G_M000_IG17:                ;; offset=0137H
       cmp      rax, 0x7FFFFFFF
       jge      SHORT G_M000_IG22
       mov      rdx, rax
       neg      rdx
       add      rdx, 0x7FFFFFFF
       jmp      G_M000_IG04

G_M000_IG18:                ;; offset=0151H
       add      eax, 3
       jmp      SHORT G_M000_IG21

G_M000_IG19:                ;; offset=0156H
       add      eax, 2
       jmp      SHORT G_M000_IG21

G_M000_IG20:                ;; offset=015BH
       inc      eax

G_M000_IG21:                ;; offset=015DH
       vzeroupper
       add      rsp, 40
       ret

G_M000_IG22:                ;; offset=0165H
       call     [System.SpanHelpers:ThrowMustBeNullTerminatedString()]
       int3

; Total bytes of code 364



; Assembly listing for method System.SpanHelpers:IndexOfNullByte(ulong):int
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; ReadyToRun compilation
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data

G_M000_IG01:                ;; offset=0000H
       sub      rsp, 40
       vzeroupper

G_M000_IG02:                ;; offset=0007H
       xor      eax, eax
       mov      edx, ecx
       and      edx, 15
       neg      edx
       add      edx, 16
       and      edx, 15

G_M000_IG03:                ;; offset=0016H
       cmp      rdx, 8
       jb       SHORT G_M000_IG05

G_M000_IG04:                ;; offset=001CH
       add      rdx, -8
       cmp      byte  ptr [rcx+rax], 0
       je       G_M000_IG19
       cmp      byte  ptr [rcx+rax+01H], 0
       je       G_M000_IG20
       cmp      byte  ptr [rcx+rax+02H], 0
       je       G_M000_IG21
       cmp      byte  ptr [rcx+rax+03H], 0
       je       G_M000_IG22
       cmp      byte  ptr [rcx+rax+04H], 0
       je       G_M000_IG23
       cmp      byte  ptr [rcx+rax+05H], 0
       je       G_M000_IG24
       cmp      byte  ptr [rcx+rax+06H], 0
       je       G_M000_IG25
       cmp      byte  ptr [rcx+rax+07H], 0
       je       G_M000_IG26
       add      rax, 8
       cmp      rdx, 8
       jae      SHORT G_M000_IG04

G_M000_IG05:                ;; offset=0081H
       cmp      rdx, 4
       jb       SHORT G_M000_IG07

G_M000_IG06:                ;; offset=0087H
       add      rdx, -4
       cmp      byte  ptr [rcx+rax], 0
       je       G_M000_IG19
       cmp      byte  ptr [rcx+rax+01H], 0
       je       G_M000_IG20
       cmp      byte  ptr [rcx+rax+02H], 0
       je       G_M000_IG21
       cmp      byte  ptr [rcx+rax+03H], 0
       je       G_M000_IG22
       add      rax, 4

G_M000_IG07:                ;; offset=00BAH
       test     rdx, rdx
       je       SHORT G_M000_IG09

G_M000_IG08:                ;; offset=00BFH
       dec      rdx
       cmp      byte  ptr [rcx+rax], 0
       je       G_M000_IG19
       inc      rax
       test     rdx, rdx
       jne      SHORT G_M000_IG08

G_M000_IG09:                ;; offset=00D4H
       cmp      rax, 0x7FFFFFFF
       jae      G_M000_IG28
       mov      edx, ecx
       add      rdx, rax
       test     dl, 31
       je       SHORT G_M000_IG11

G_M000_IG10:                ;; offset=00EAH
       vxorps   xmm0, xmm0, xmm0
       vpcmpeqb xmm0, xmm0, xmmword ptr [rcx+rax]
       vpmovmskb edx, xmm0
       test     edx, edx
       jne      SHORT G_M000_IG16
       add      rax, 16

G_M000_IG11:                ;; offset=00FFH
       mov      edx, eax
       neg      edx
       add      edx, 0x7FFFFFFF
       and      edx, -32
       cmp      rdx, rax
       jbe      SHORT G_M000_IG13

G_M000_IG12:                ;; offset=0111H
       vxorps   ymm0, ymm0, ymm0
       vpcmpeqb ymm0, ymm0, ymmword ptr [rcx+rax]
       vpmovmskb r8d, ymm0
       test     r8d, r8d
       jne      SHORT G_M000_IG17
       add      rax, 32
       cmp      rdx, rax
       ja       SHORT G_M000_IG12

G_M000_IG13:                ;; offset=012CH
       mov      edx, eax
       neg      edx
       add      edx, 0x7FFFFFFF
       and      edx, -16
       mov      r8d, edx
       cmp      r8, rax
       jbe      SHORT G_M000_IG15

G_M000_IG14:                ;; offset=0141H
       vxorps   xmm0, xmm0, xmm0
       vpcmpeqb xmm0, xmm0, xmmword ptr [rcx+rax]
       vpmovmskb edx, xmm0
       test     edx, edx
       jne      SHORT G_M000_IG18
       add      rax, 16

G_M000_IG15:                ;; offset=0156H
       cmp      rax, 0x7FFFFFFF
       jae      SHORT G_M000_IG28
       mov      rdx, rax
       neg      rdx
       add      rdx, 0x7FFFFFFF
       jmp      G_M000_IG03

G_M000_IG16:                ;; offset=0170H
       tzcnt    edx, edx
       add      eax, edx
       jmp      SHORT G_M000_IG27

G_M000_IG17:                ;; offset=0178H
       xor      edx, edx
       tzcnt    edx, r8d
       add      eax, edx
       jmp      SHORT G_M000_IG27

G_M000_IG18:                ;; offset=0183H
       tzcnt    edx, edx
       add      eax, edx
       jmp      SHORT G_M000_IG27

G_M000_IG19:                ;; offset=018BH
       jmp      SHORT G_M000_IG27

G_M000_IG20:                ;; offset=018DH
       inc      eax
       jmp      SHORT G_M000_IG27

G_M000_IG21:                ;; offset=0191H
       add      eax, 2
       jmp      SHORT G_M000_IG27

G_M000_IG22:                ;; offset=0196H
       add      eax, 3
       jmp      SHORT G_M000_IG27

G_M000_IG23:                ;; offset=019BH
       add      eax, 4
       jmp      SHORT G_M000_IG27

G_M000_IG24:                ;; offset=01A0H
       add      eax, 5
       jmp      SHORT G_M000_IG27

G_M000_IG25:                ;; offset=01A5H
       add      eax, 6
       jmp      SHORT G_M000_IG27

G_M000_IG26:                ;; offset=01AAH
       add      eax, 7

G_M000_IG27:                ;; offset=01ADH
       vzeroupper
       add      rsp, 40
       ret

G_M000_IG28:                ;; offset=01B5H
       call     [System.SpanHelpers:ThrowMustBeNullTerminatedString()]
       int3

; Total bytes of code 444

ghost · 2023-05-05T12:37:12Z

Tagging subscribers to this area: @dotnet/area-system-memory
See info in area-owners.md if you want to be subscribed.

Issue Details

Contributes to #84421 (removes a couple of methods jitted during Hello World start).

I don't see a good reason for these to have [AO], R2R'd versions look good enough to me (SSE based):

R2R codegen:

; Assembly listing for method System.SpanHelpers:IndexOfNullCharacter(ulong):int
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; ReadyToRun compilation
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data

G_M000_IG01:                ;; offset=0000H
       sub      rsp, 40
       vzeroupper

G_M000_IG02:                ;; offset=0007H
       xor      eax, eax
       mov      edx, 0x7FFFFFFF
       test     cl, 1
       jne      SHORT G_M000_IG04

G_M000_IG03:                ;; offset=0013H
       mov      edx, ecx
       neg      edx
       mov      r8d, edx
       shr      r8d, 31
       add      edx, r8d
       sar      edx, 1
       and      rdx, 7

G_M000_IG04:                ;; offset=0027H
       cmp      rdx, 4
       jl       SHORT G_M000_IG06

G_M000_IG05:                ;; offset=002DH
       cmp      word  ptr [rcx+2*rax], 0
       je       G_M000_IG21
       cmp      word  ptr [rcx+2*rax+02H], 0
       je       G_M000_IG20
       cmp      word  ptr [rcx+2*rax+04H], 0
       je       G_M000_IG19
       cmp      word  ptr [rcx+2*rax+06H], 0
       je       G_M000_IG18
       add      rax, 4
       add      rdx, -4
       cmp      rdx, 4
       jge      SHORT G_M000_IG05

G_M000_IG06:                ;; offset=006AH
       test     rdx, rdx
       jle      SHORT G_M000_IG08

G_M000_IG07:                ;; offset=006FH
       cmp      word  ptr [rcx+2*rax], 0
       je       G_M000_IG21
       inc      rax
       dec      rdx
       test     rdx, rdx
       jg       SHORT G_M000_IG07

G_M000_IG08:                ;; offset=0085H
       cmp      rax, 0x7FFFFFFF
       jge      G_M000_IG22
       lea      rdx, [rcx+2*rax]
       test     dl, 31
       je       SHORT G_M000_IG11

G_M000_IG09:                ;; offset=009AH
       vxorps   xmm0, xmm0, xmm0
       vpcmpeqw xmm0, xmm0, xmmword ptr [rcx+2*rax]
       vpmovmskb edx, xmm0
       test     edx, edx
       jne      SHORT G_M000_IG10
       add      rax, 8
       jmp      SHORT G_M000_IG11

G_M000_IG10:                ;; offset=00B1H
       xor      ecx, ecx
       tzcnt    ecx, edx
       shr      ecx, 1
       mov      edx, ecx
       add      eax, edx
       jmp      G_M000_IG21

G_M000_IG11:                ;; offset=00C2H
       mov      rdx, rax
       neg      rdx
       add      rdx, 0x7FFFFFFF
       and      rdx, -16
       jle      SHORT G_M000_IG13

G_M000_IG12:                ;; offset=00D5H
       vxorps   ymm0, ymm0, ymm0
       vpcmpeqw ymm0, ymm0, ymmword ptr [rcx+2*rax]
       vpmovmskb r8d, ymm0
       test     r8d, r8d
       jne      SHORT G_M000_IG15
       add      rax, 16
       add      rdx, -16
       test     rdx, rdx
       jg       SHORT G_M000_IG12

G_M000_IG13:                ;; offset=00F4H
       mov      r8, rax
       neg      r8
       add      r8, 0x7FFFFFFF
       and      r8, -8
       jle      SHORT G_M000_IG17

G_M000_IG14:                ;; offset=0107H
       vxorps   xmm0, xmm0, xmm0
       vpcmpeqw xmm0, xmm0, xmmword ptr [rcx+2*rax]
       vpmovmskb edx, xmm0
       test     edx, edx
       jne      SHORT G_M000_IG16
       add      rax, 8
       jmp      SHORT G_M000_IG17

G_M000_IG15:                ;; offset=011EH
       xor      edx, edx
       tzcnt    edx, r8d
       shr      edx, 1
       mov      ecx, edx
       add      eax, ecx
       jmp      SHORT G_M000_IG21

G_M000_IG16:                ;; offset=012DH
       tzcnt    edx, edx
       shr      edx, 1
       add      eax, edx
       jmp      SHORT G_M000_IG21

G_M000_IG17:                ;; offset=0137H
       cmp      rax, 0x7FFFFFFF
       jge      SHORT G_M000_IG22
       mov      rdx, rax
       neg      rdx
       add      rdx, 0x7FFFFFFF
       jmp      G_M000_IG04

G_M000_IG18:                ;; offset=0151H
       add      eax, 3
       jmp      SHORT G_M000_IG21

G_M000_IG19:                ;; offset=0156H
       add      eax, 2
       jmp      SHORT G_M000_IG21

G_M000_IG20:                ;; offset=015BH
       inc      eax

G_M000_IG21:                ;; offset=015DH
       vzeroupper
       add      rsp, 40
       ret

G_M000_IG22:                ;; offset=0165H
       call     [System.SpanHelpers:ThrowMustBeNullTerminatedString()]
       int3

; Total bytes of code 364



; Assembly listing for method System.SpanHelpers:IndexOfNullByte(ulong):int
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; ReadyToRun compilation
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data

G_M000_IG01:                ;; offset=0000H
       sub      rsp, 40
       vzeroupper

G_M000_IG02:                ;; offset=0007H
       xor      eax, eax
       mov      edx, ecx
       and      edx, 15
       neg      edx
       add      edx, 16
       and      edx, 15

G_M000_IG03:                ;; offset=0016H
       cmp      rdx, 8
       jb       SHORT G_M000_IG05

G_M000_IG04:                ;; offset=001CH
       add      rdx, -8
       cmp      byte  ptr [rcx+rax], 0
       je       G_M000_IG19
       cmp      byte  ptr [rcx+rax+01H], 0
       je       G_M000_IG20
       cmp      byte  ptr [rcx+rax+02H], 0
       je       G_M000_IG21
       cmp      byte  ptr [rcx+rax+03H], 0
       je       G_M000_IG22
       cmp      byte  ptr [rcx+rax+04H], 0
       je       G_M000_IG23
       cmp      byte  ptr [rcx+rax+05H], 0
       je       G_M000_IG24
       cmp      byte  ptr [rcx+rax+06H], 0
       je       G_M000_IG25
       cmp      byte  ptr [rcx+rax+07H], 0
       je       G_M000_IG26
       add      rax, 8
       cmp      rdx, 8
       jae      SHORT G_M000_IG04

G_M000_IG05:                ;; offset=0081H
       cmp      rdx, 4
       jb       SHORT G_M000_IG07

G_M000_IG06:                ;; offset=0087H
       add      rdx, -4
       cmp      byte  ptr [rcx+rax], 0
       je       G_M000_IG19
       cmp      byte  ptr [rcx+rax+01H], 0
       je       G_M000_IG20
       cmp      byte  ptr [rcx+rax+02H], 0
       je       G_M000_IG21
       cmp      byte  ptr [rcx+rax+03H], 0
       je       G_M000_IG22
       add      rax, 4

G_M000_IG07:                ;; offset=00BAH
       test     rdx, rdx
       je       SHORT G_M000_IG09

G_M000_IG08:                ;; offset=00BFH
       dec      rdx
       cmp      byte  ptr [rcx+rax], 0
       je       G_M000_IG19
       inc      rax
       test     rdx, rdx
       jne      SHORT G_M000_IG08

G_M000_IG09:                ;; offset=00D4H
       cmp      rax, 0x7FFFFFFF
       jae      G_M000_IG28
       mov      edx, ecx
       add      rdx, rax
       test     dl, 31
       je       SHORT G_M000_IG11

G_M000_IG10:                ;; offset=00EAH
       vxorps   xmm0, xmm0, xmm0
       vpcmpeqb xmm0, xmm0, xmmword ptr [rcx+rax]
       vpmovmskb edx, xmm0
       test     edx, edx
       jne      SHORT G_M000_IG16
       add      rax, 16

G_M000_IG11:                ;; offset=00FFH
       mov      edx, eax
       neg      edx
       add      edx, 0x7FFFFFFF
       and      edx, -32
       cmp      rdx, rax
       jbe      SHORT G_M000_IG13

G_M000_IG12:                ;; offset=0111H
       vxorps   ymm0, ymm0, ymm0
       vpcmpeqb ymm0, ymm0, ymmword ptr [rcx+rax]
       vpmovmskb r8d, ymm0
       test     r8d, r8d
       jne      SHORT G_M000_IG17
       add      rax, 32
       cmp      rdx, rax
       ja       SHORT G_M000_IG12

G_M000_IG13:                ;; offset=012CH
       mov      edx, eax
       neg      edx
       add      edx, 0x7FFFFFFF
       and      edx, -16
       mov      r8d, edx
       cmp      r8, rax
       jbe      SHORT G_M000_IG15

G_M000_IG14:                ;; offset=0141H
       vxorps   xmm0, xmm0, xmm0
       vpcmpeqb xmm0, xmm0, xmmword ptr [rcx+rax]
       vpmovmskb edx, xmm0
       test     edx, edx
       jne      SHORT G_M000_IG18
       add      rax, 16

G_M000_IG15:                ;; offset=0156H
       cmp      rax, 0x7FFFFFFF
       jae      SHORT G_M000_IG28
       mov      rdx, rax
       neg      rdx
       add      rdx, 0x7FFFFFFF
       jmp      G_M000_IG03

G_M000_IG16:                ;; offset=0170H
       tzcnt    edx, edx
       add      eax, edx
       jmp      SHORT G_M000_IG27

G_M000_IG17:                ;; offset=0178H
       xor      edx, edx
       tzcnt    edx, r8d
       add      eax, edx
       jmp      SHORT G_M000_IG27

G_M000_IG18:                ;; offset=0183H
       tzcnt    edx, edx
       add      eax, edx
       jmp      SHORT G_M000_IG27

G_M000_IG19:                ;; offset=018BH
       jmp      SHORT G_M000_IG27

G_M000_IG20:                ;; offset=018DH
       inc      eax
       jmp      SHORT G_M000_IG27

G_M000_IG21:                ;; offset=0191H
       add      eax, 2
       jmp      SHORT G_M000_IG27

G_M000_IG22:                ;; offset=0196H
       add      eax, 3
       jmp      SHORT G_M000_IG27

G_M000_IG23:                ;; offset=019BH
       add      eax, 4
       jmp      SHORT G_M000_IG27

G_M000_IG24:                ;; offset=01A0H
       add      eax, 5
       jmp      SHORT G_M000_IG27

G_M000_IG25:                ;; offset=01A5H
       add      eax, 6
       jmp      SHORT G_M000_IG27

G_M000_IG26:                ;; offset=01AAH
       add      eax, 7

G_M000_IG27:                ;; offset=01ADH
       vzeroupper
       add      rsp, 40
       ret

G_M000_IG28:                ;; offset=01B5H
       call     [System.SpanHelpers:ThrowMustBeNullTerminatedString()]
       int3

; Total bytes of code 444

Author:	EgorBo
Assignees:	EgorBo
Labels:	`area-System.Memory`
Milestone:	-

EgorBo · 2023-05-05T12:42:55Z

Also, removed from SequenceCompareTo(ref byte first, int firstLength, ref byte second, int secondLength):

; Assembly listing for method System.SpanHelpers:SequenceCompareTo(byref,int,byref,int):int
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; ReadyToRun compilation
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 7 single block inlinees; 0 inlinees without PGO data

G_M000_IG01:                ;; offset=0000H
       push     rsi
       vzeroupper

G_M000_IG02:                ;; offset=0004H
       cmp      rcx, r8
       je       G_M000_IG16

G_M000_IG03:                ;; offset=000DH
       cmp      edx, r9d
       mov      eax, r9d
       cmovb    eax, edx
       mov      r10d, eax
       xor      r11d, r11d
       mov      rax, r10
       cmp      rax, 32
       jb       SHORT G_M000_IG08
       add      rax, -32
       je       SHORT G_M000_IG05

G_M000_IG04:                ;; offset=002BH
       vmovups  ymm0, ymmword ptr [rcx+r11]
       vpcmpeqb ymm0, ymm0, ymmword ptr [r8+r11]
       vpmovmskb r10d, ymm0
       cmp      r10d, -1
       jne      SHORT G_M000_IG06
       add      r11, 32
       cmp      rax, r11
       ja       SHORT G_M000_IG04

G_M000_IG05:                ;; offset=004AH
       mov      r11, rax
       vmovups  ymm0, ymmword ptr [rcx+r11]
       vpcmpeqb ymm0, ymm0, ymmword ptr [r8+r11]
       vpmovmskb r10d, ymm0
       cmp      r10d, -1
       je       G_M000_IG16

G_M000_IG06:                ;; offset=0067H
       mov      eax, r10d
       not      eax
       tzcnt    eax, eax
       add      rax, r11
       mov      r11, rax
       movzx    rax, byte  ptr [rcx+r11]
       movzx    rcx, byte  ptr [r8+r11]
       sub      eax, ecx

G_M000_IG07:                ;; offset=0082H
       vzeroupper
       pop      rsi
       ret

G_M000_IG08:                ;; offset=0087H
       cmp      r10, 16
       jb       SHORT G_M000_IG12
       add      rax, -16
       je       SHORT G_M000_IG09
       vmovups  xmm0, xmmword ptr [rcx]
       vpcmpeqb xmm0, xmm0, xmmword ptr [r8]
       vpmovmskb r10d, xmm0
       cmp      r10d, 0xFFFF
       jne      SHORT G_M000_IG10

G_M000_IG09:                ;; offset=00A9H
       mov      r11, rax
       vmovups  xmm0, xmmword ptr [rcx+r11]
       vpcmpeqb xmm0, xmm0, xmmword ptr [r8+r11]
       vpmovmskb r10d, xmm0
       cmp      r10d, 0xFFFF
       je       SHORT G_M000_IG16

G_M000_IG10:                ;; offset=00C5H
       mov      eax, r10d
       not      eax
       tzcnt    eax, eax
       add      rax, r11
       mov      r11, rax
       movzx    rax, byte  ptr [rcx+r11]
       movzx    rcx, byte  ptr [r8+r11]
       sub      eax, ecx

G_M000_IG11:                ;; offset=00E0H
       vzeroupper
       pop      rsi
       ret

G_M000_IG12:                ;; offset=00E5H
       cmp      r10, 8
       jbe      SHORT G_M000_IG14
       lea      rax, [r10-08H]
       test     rax, rax
       je       SHORT G_M000_IG14

G_M000_IG13:                ;; offset=00F4H
       mov      rsi, qword ptr [rcx+r11]
       cmp      rsi, qword ptr [r8+r11]
       jne      SHORT G_M000_IG14
       add      r11, 8
       cmp      rax, r11
       ja       SHORT G_M000_IG13

G_M000_IG14:                ;; offset=0107H
       cmp      r10, r11
       jbe      SHORT G_M000_IG16

G_M000_IG15:                ;; offset=010CH
       movzx    rax, byte  ptr [rcx+r11]
       movzx    rsi, byte  ptr [r8+r11]
       sub      eax, esi
       jne      SHORT G_M000_IG18
       inc      r11
       cmp      r10, r11
       ja       SHORT G_M000_IG15

G_M000_IG16:                ;; offset=0122H
       mov      eax, edx
       sub      eax, r9d

G_M000_IG17:                ;; offset=0127H
       vzeroupper
       pop      rsi
       ret

G_M000_IG18:                ;; offset=012CH
       vzeroupper
       pop      rsi
       ret

; Total bytes of code 305

stephentoub · 2023-05-05T12:52:05Z

Also contributes to #71261

jkotas · 2023-05-05T13:50:08Z

src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs

@@ -422,7 +422,6 @@ public static unsafe int SequenceCompareTo(ref char first, int firstLength, ref

        // IndexOfNullCharacter processes memory in aligned chunks, and thus it won't crash even if it accesses memory beyond the null terminator.
        // This behavior is an implementation detail of the runtime and callers outside System.Private.CoreLib must not depend on it.
-        [MethodImpl(MethodImplOptions.AggressiveOptimization)]


There are some more SpanHelpers methods marked with AggressiveOptimization. Delete it on all of them?

There are some more SpanHelpers methods marked with AggressiveOptimization. Delete it on all of them?

There are a few cases when because of SVM we get a non optimal codegen in R2R (similar to #84421 (comment)) so I didn't want to regress SpanHelpers

E.g. System.SpanHelpers:LastIndexOfValueType[short,System.SpanHelpers+DontNegate1[short]](byref,short,int)`

So only *IndexOfAnyValue* (with generic math) are left with AggressiveOptimization in SpanHelpers

Both R2R and Tier0 codegen for these compilated generic constructs tend to be pretty bad. I would not worry about it - we have the same problem in number of other places.

E.g. if I remove [AO] from LastIndexOfValueType here is what I get:

; Assembly listing for method System.SpanHelpers:LastIndexOfValueType[short](byref,short,int):int ; Emitting BLENDED_CODE for X64 CPU with AVX - Windows ; ReadyToRun compilation ; optimized code ; rsp based frame ; fully interruptible ; No PGO data G_M000_IG01: ;; offset=0000H G_M000_IG02: ;; offset=0000H movsx rdx, dx lea rax, [(reloc 0x435488)] G_M000_IG03: ;; offset=000BH tail.jmp [rax]System.SpanHelpers:LastIndexOfValueType[short,System.SpanHelpers+DontNegate`1[short]](byref,short,int):int ; Total bytes of code 14

and that nested LastIndexOfValueType is jit-compiled. so presumably we'll get a slow Tier0 version instead of having AggressiveOpt one for start - if that is ok I can remove

I think it is ok.

Ok, removed them. Thus, we only have 3 uses of AO in the corelib - 1 in AsyncTaskMethodBuilder that says that AO helps it to avoid allocations in T0. And two in CastHelpers which have to be there since VM special case them to be direct calls

kunalspathak · 2023-05-09T16:38:47Z

@EgorBo - do you know why AO was cause for jitting these methods on startup for x64 and not for arm64?

stephentoub · 2023-05-09T16:41:01Z

do you know why AO was cause for jitting these methods on startup for x64 and not for arm64?

I think the comparison was apples vs oranges... they were running different code because EventSource startup gunk was being invoked in one case and not the other.

EgorBo · 2023-05-09T16:41:19Z

@EgorBo - do you know why AO was cause for jitting these methods on startup for x64 and not for arm64?

I'd say it is #85791 (comment)
I also saw that from stack traces in JIT's compileMethod

kunalspathak · 2023-05-09T16:43:21Z

Got it. Yes @TIHan also confirmed that in #85791 (comment).

ghost assigned EgorBo May 5, 2023

dotnet-issue-labeler bot added the area-System.Memory label May 5, 2023

Remove more attributes

4c583a2

EgorBo force-pushed the remove-ao-spanhelpers branch from a2dea04 to 4c583a2 Compare May 5, 2023 12:45

EgorBo mentioned this pull request May 5, 2023

X64 - Too many methods JITted on startup for blank console application #85791

Open

6 tasks

jkotas reviewed May 5, 2023

View reviewed changes

Remove more

4fd5a24

jkotas approved these changes May 5, 2023

View reviewed changes

Remove more

635e5df

EgorBo merged commit 9abc5a9 into dotnet:main May 5, 2023

EgorBo deleted the remove-ao-spanhelpers branch May 5, 2023 16:21

jkotas added the tenet-performance Performance related issue label May 5, 2023

ghost locked as resolved and limited conversation to collaborators Jun 8, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Remove AO from a couple of SpanHelpers methods #85819

Remove AO from a couple of SpanHelpers methods #85819

EgorBo commented May 5, 2023 •

edited

Loading

ghost commented May 5, 2023

EgorBo commented May 5, 2023

stephentoub commented May 5, 2023

jkotas May 5, 2023

EgorBo May 5, 2023

EgorBo May 5, 2023

EgorBo May 5, 2023

jkotas May 5, 2023 •

edited

Loading

EgorBo May 5, 2023

jkotas May 5, 2023

EgorBo May 5, 2023 •

edited

Loading

kunalspathak commented May 9, 2023

stephentoub commented May 9, 2023

EgorBo commented May 9, 2023

kunalspathak commented May 9, 2023

Remove AO from a couple of SpanHelpers methods #85819

Remove AO from a couple of SpanHelpers methods #85819

Conversation

EgorBo commented May 5, 2023 • edited Loading

ghost commented May 5, 2023

EgorBo commented May 5, 2023

stephentoub commented May 5, 2023

jkotas May 5, 2023

Choose a reason for hiding this comment

EgorBo May 5, 2023

Choose a reason for hiding this comment

EgorBo May 5, 2023

Choose a reason for hiding this comment

EgorBo May 5, 2023

Choose a reason for hiding this comment

jkotas May 5, 2023 • edited Loading

Choose a reason for hiding this comment

EgorBo May 5, 2023

Choose a reason for hiding this comment

jkotas May 5, 2023

Choose a reason for hiding this comment

EgorBo May 5, 2023 • edited Loading

Choose a reason for hiding this comment

kunalspathak commented May 9, 2023

stephentoub commented May 9, 2023

EgorBo commented May 9, 2023

kunalspathak commented May 9, 2023

EgorBo commented May 5, 2023 •

edited

Loading

jkotas May 5, 2023 •

edited

Loading

EgorBo May 5, 2023 •

edited

Loading