Improve performance for Math.Abs #15823
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -278,19 +278,17 @@ private void SetBits(int[] bits) | |
{ | ||
if (bits == null) | ||
throw new ArgumentNullException(nameof(bits)); | ||
if (bits.Length == 4) | ||
if (bits.Length != 4) | ||
ThrowInvalidDecimalBytes(); | ||
int f = bits[3]; | ||
if ((f & ~(SignMask | ScaleMask)) == 0 && (f & ScaleMask) <= (28 << 16)) | ||
{ | ||
int f = bits[3]; | ||
if ((f & ~(SignMask | ScaleMask)) == 0 && (f & ScaleMask) <= (28 << 16)) | ||
{ | ||
lo = bits[0]; | ||
mid = bits[1]; | ||
hi = bits[2]; | ||
flags = f; | ||
return; | ||
} | ||
lo = bits[0]; | ||
mid = bits[1]; | ||
hi = bits[2]; | ||
flags = f; | ||
return; | ||
} | ||
throw new ArgumentException(SR.Arg_DecBitCtor); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where did this exception go? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would put the decimal optimization into separate PR. They are pretty unrelated to the Abs change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was to improve the ; Assembly listing for method Math:Abs(struct):struct
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
; V00 RetBuf [V00,T01] ( 7, 7 ) byref -> rcx
; V01 arg0 [V01,T00] ( 7, 12 ) byref -> rdx
;* V02 loc0 [V02 ] ( 0, 0 ) struct (16) zero-ref
;* V03 tmp1 [V03 ] ( 0, 0 ) struct (16) zero-ref
; V04 tmp2 [V04,T02] ( 4, 7 ) int -> rax
; V05 tmp3 [V05,T11] ( 3, 0 ) ref -> rsi class-hnd exact
;* V06 tmp4 [V06 ] ( 0, 0 ) int -> zero-ref
;* V07 tmp5 [V07 ] ( 0, 0 ) int -> zero-ref
;* V08 tmp6 [V08 ] ( 0, 0 ) int -> zero-ref
; V09 tmp7 [V09,T03] ( 2, 2 ) int -> rax V02.flags(offs=0x00) P-INDEP
; V10 tmp8 [V10,T04] ( 2, 2 ) int -> r8 V02.hi(offs=0x04) P-INDEP
; V11 tmp9 [V11,T05] ( 2, 2 ) int -> r9 V02.lo(offs=0x08) P-INDEP
; V12 tmp10 [V12,T06] ( 2, 2 ) int -> rdx V02.mid(offs=0x0c) P-INDEP
; V13 tmp11 [V13,T07] ( 2, 2 ) int -> rax V03.flags(offs=0x00) P-INDEP
; V14 tmp12 [V14,T08] ( 2, 2 ) int -> r8 V03.hi(offs=0x04) P-INDEP
; V15 tmp13 [V15,T09] ( 2, 2 ) int -> r9 V03.lo(offs=0x08) P-INDEP
; V16 tmp14 [V16,T10] ( 2, 2 ) int -> rdx V03.mid(offs=0x0c) P-INDEP
; V17 tmp15 [V17,T12] ( 2, 0 ) ref -> rcx
; V18 tmp16 [V18,T13] ( 2, 0 ) ref -> rdx
; V19 OutArgs [V19 ] ( 1, 1 ) lclBlk (32) [rsp+0x00]
;
; Lcl frame size = 32
G_M42161_IG01:
push rsi
sub rsp, 32
G_M42161_IG02:
mov eax, dword ptr [rdx]
mov r8d, dword ptr [rdx+4]
mov r9d, dword ptr [rdx+8]
mov edx, dword ptr [rdx+12]
and eax, 0xD1FFAB1E
test eax, 0xD1FFAB1E
jne G_M42161_IG04
mov r10d, eax
and r10d, 0xD1FFAB1E
cmp r10d, 0xD1FFAB1E
jg G_M42161_IG04
mov dword ptr [rcx], eax
mov dword ptr [rcx+4], r8d
mov dword ptr [rcx+8], r9d
mov dword ptr [rcx+12], edx
mov rax, rcx
G_M42161_IG03:
add rsp, 32
pop rsi
ret
************** Beginning of cold code **************
G_M42161_IG04:
lea rcx, [(reloc)]
call CORINFO_HELP_NEWSFAST
mov rsi, rax
mov ecx, 0x2648
call CORINFO_HELP_STRCNS_CURRENT_MODULE
mov rcx, rax
xor rdx, rdx
call SR:GetResourceString(ref,ref):ref
mov rdx, rax
mov rcx, rsi
call ArgumentException:.ctor(ref):this
mov rcx, rsi
call CORINFO_HELP_THROW
int3
; Total bytes of code 134, prolog size 5 for method Math:Abs(struct):struct
; ============================================================ But seems to have made it worse by making the variables ; Assembly listing for method Math:Abs(struct):struct
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
; V00 RetBuf [V00,T01] ( 7, 7 ) byref -> rcx
; V01 arg0 [V01,T00] ( 7, 12 ) byref -> rdx
;* V02 loc0 [V02 ] ( 0, 0 ) struct (16) zero-ref
; V03 tmp1 [V03 ] ( 13, 24 ) struct (16) [rsp+0x28] do-not-enreg[XS] addr-exposed
; V04 tmp2 [V04,T02] ( 4, 7 ) int -> rax
;* V05 tmp3 [V05 ] ( 0, 0 ) int -> zero-ref
;* V06 tmp4 [V06 ] ( 0, 0 ) int -> zero-ref
;* V07 tmp5 [V07 ] ( 0, 0 ) int -> zero-ref
; V08 tmp6 [V08,T03] ( 2, 2 ) int -> rax V02.flags(offs=0x00) P-INDEP
; V09 tmp7 [V09,T04] ( 2, 2 ) int -> r8 V02.hi(offs=0x04) P-INDEP
; V10 tmp8 [V10,T05] ( 2, 2 ) int -> r9 V02.lo(offs=0x08) P-INDEP
; V11 tmp9 [V11,T06] ( 2, 2 ) int -> rdx V02.mid(offs=0x0c) P-INDEP
; V12 tmp10 [V12 ] ( 4, 3 ) int -> [rsp+0x28] do-not-enreg[X] addr-exposed V03.flags(offs=0x00) P-DEP
; V13 tmp11 [V13 ] ( 4, 3 ) int -> [rsp+0x2C] do-not-enreg[X] addr-exposed V03.hi(offs=0x04) P-DEP
; V14 tmp12 [V14 ] ( 4, 3 ) int -> [rsp+0x30] do-not-enreg[X] addr-exposed V03.lo(offs=0x08) P-DEP
; V15 tmp13 [V15 ] ( 4, 3 ) int -> [rsp+0x34] do-not-enreg[X] addr-exposed V03.mid(offs=0x0c) P-DEP
; V16 OutArgs [V16 ] ( 1, 1 ) lclBlk (32) [rsp+0x00]
;
; Lcl frame size = 56
G_M42161_IG01:
sub rsp, 56
nop
G_M42161_IG02:
mov eax, dword ptr [rdx]
mov r8d, dword ptr [rdx+4]
mov r9d, dword ptr [rdx+8]
mov edx, dword ptr [rdx+12]
xor r10d, r10d
mov dword ptr [rsp+28H], r10d
mov dword ptr [rsp+2CH], r10d
mov dword ptr [rsp+30H], r10d
mov dword ptr [rsp+34H], r10d
and eax, 0xD1FFAB1E
test eax, 0xD1FFAB1E
jne G_M42161_IG04
mov r10d, eax
and r10d, 0xD1FFAB1E
cmp r10d, 0xD1FFAB1E
jg G_M42161_IG04
mov dword ptr [rsp+30H], r9d
mov dword ptr [rsp+34H], edx
mov dword ptr [rsp+2CH], r8d
mov dword ptr [rsp+28H], eax
mov eax, dword ptr [rsp+28H]
mov dword ptr [rcx], eax
mov eax, dword ptr [rsp+2CH]
mov dword ptr [rcx+4], eax
mov eax, dword ptr [rsp+30H]
mov dword ptr [rcx+8], eax
mov eax, dword ptr [rsp+34H]
mov dword ptr [rcx+12], eax
mov rax, rcx
G_M42161_IG03:
add rsp, 56
ret
************** Beginning of cold code **************
G_M42161_IG04:
lea rcx, bword ptr [rsp+28H]
call Decimal:ThrowInvalidDecimalBytes():this
int3
; Total bytes of code 144, prolog size 5 for method Math:Abs(struct):struct There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really sure why, but will revert There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would expect the decimal improvement will grow once it is fine tuned with all the feedback. E.g.:
More smaller PRs move faster. |
||
} | ||
|
||
// Constructs a Decimal from its constituent parts. | ||
|
@@ -338,14 +336,18 @@ void IDeserializationCallback.OnDeserialization(Object sender) | |
// Constructs a Decimal from its constituent parts. | ||
private Decimal(int lo, int mid, int hi, int flags) | ||
{ | ||
if ((flags & ~(SignMask | ScaleMask)) == 0 && (flags & ScaleMask) <= (28 << 16)) | ||
{ | ||
this.lo = lo; | ||
this.mid = mid; | ||
this.hi = hi; | ||
this.flags = flags; | ||
return; | ||
} | ||
if (!((flags & ~(SignMask | ScaleMask)) == 0 && (flags & ScaleMask) <= (28 << 16))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this intentional? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was; but is also completely unnecessary as the function doesn't return anything - will revert |
||
ThrowInvalidDecimalBytes(); | ||
|
||
this.lo = lo; | ||
this.mid = mid; | ||
this.hi = hi; | ||
this.flags = flags; | ||
} | ||
|
||
[StackTraceHidden] | ||
private void ThrowInvalidDecimalBytes() | ||
{ | ||
throw new ArgumentException(SR.Arg_DecBitCtor); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does this do, prevent inlining?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Stops it showing up in exception stack trace #14652 so it looks like the throw was in-place rather than off a throw helper function