Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Improve performance for Math.Abs #15823

Merged
merged 4 commits into from Jan 11, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
98 changes: 46 additions & 52 deletions src/mscorlib/shared/System/Math.cs
Expand Up @@ -36,25 +36,61 @@ public static partial class Math
1E9, 1E10, 1E11, 1E12, 1E13, 1E14, 1E15
};

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short Abs(short value)
{
return (value >= 0) ? value : AbsHelper(value);
if (value < 0)
{
value = (short)-value;
if (value < 0)
{
ThrowAbsOverflow();
}
}
return value;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Abs(int value)
{
return (value >= 0) ? value : AbsHelper(value);
if (value < 0)
{
value = -value;
if (value < 0)
{
ThrowAbsOverflow();
}
}
return value;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long Abs(long value)
{
return (value >= 0) ? value : AbsHelper(value);
if (value < 0)
{
value = -value;
if (value < 0)
{
ThrowAbsOverflow();
}
}
return value;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static sbyte Abs(sbyte value)
{
return (value >= 0) ? value : AbsHelper(value);
if (value < 0)
{
value = (sbyte)-value;
if (value < 0)
{
ThrowAbsOverflow();
}
}
return value;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand All @@ -63,6 +99,12 @@ public static decimal Abs(decimal value)
return decimal.Abs(value);
}

[StackTraceHidden]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this do, prevent inlining?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stops it showing up in exception stack trace #14652 so it looks like the throw was in-place rather than off a throw helper function

private static void ThrowAbsOverflow()
{
throw new OverflowException(SR.Overflow_NegateTwosCompNum);
}

public static long BigMul(int a, int b)
{
return ((long)a) * b;
Expand Down Expand Up @@ -758,54 +800,6 @@ public static unsafe double Truncate(double d)
return d;
}

private static short AbsHelper(short value)
{
Debug.Assert(value < 0, "AbsHelper should only be called for negative values! (workaround for JIT inlining)");

if (value == short.MinValue)
{
throw new OverflowException(SR.Overflow_NegateTwosCompNum);
}

return ((short)(-value));
}

private static int AbsHelper(int value)
{
Debug.Assert(value < 0, "AbsHelper should only be called for negative values! (workaround for JIT inlining)");

if (value == int.MinValue)
{
throw new OverflowException(SR.Overflow_NegateTwosCompNum);
}

return -value;
}

private static long AbsHelper(long value)
{
Debug.Assert(value < 0, "AbsHelper should only be called for negative values! (workaround for JIT inlining)");

if (value == long.MinValue)
{
throw new OverflowException(SR.Overflow_NegateTwosCompNum);
}

return -value;
}

private static sbyte AbsHelper(sbyte value)
{
Debug.Assert(value < 0, "AbsHelper should only be called for negative values! (workaround for JIT inlining)");

if (value == sbyte.MinValue)
{
throw new OverflowException(SR.Overflow_NegateTwosCompNum);
}

return ((sbyte)(-value));
}

private static unsafe double copysign(double x, double y)
{
var xbits = BitConverter.DoubleToInt64Bits(x);
Expand Down
40 changes: 21 additions & 19 deletions src/mscorlib/src/System/Decimal.cs
Expand Up @@ -278,19 +278,17 @@ private void SetBits(int[] bits)
{
if (bits == null)
throw new ArgumentNullException(nameof(bits));
if (bits.Length == 4)
if (bits.Length != 4)
ThrowInvalidDecimalBytes();
int f = bits[3];
if ((f & ~(SignMask | ScaleMask)) == 0 && (f & ScaleMask) <= (28 << 16))
{
int f = bits[3];
if ((f & ~(SignMask | ScaleMask)) == 0 && (f & ScaleMask) <= (28 << 16))
{
lo = bits[0];
mid = bits[1];
hi = bits[2];
flags = f;
return;
}
lo = bits[0];
mid = bits[1];
hi = bits[2];
flags = f;
return;
}
throw new ArgumentException(SR.Arg_DecBitCtor);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where did this exception go?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put the decimal optimization into separate PR. They are pretty unrelated to the Abs change.

Copy link
Member Author

@benaadams benaadams Jan 11, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was to improve the Math.Abs(decimal)

; Assembly listing for method Math:Abs(struct):struct
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 RetBuf       [V00,T01] (  7,  7   )   byref  ->  rcx        
;  V01 arg0         [V01,T00] (  7, 12   )   byref  ->  rdx        
;* V02 loc0         [V02    ] (  0,  0   )  struct (16) zero-ref   
;* V03 tmp1         [V03    ] (  0,  0   )  struct (16) zero-ref   
;  V04 tmp2         [V04,T02] (  4,  7   )     int  ->  rax        
;  V05 tmp3         [V05,T11] (  3,  0   )     ref  ->  rsi         class-hnd exact
;* V06 tmp4         [V06    ] (  0,  0   )     int  ->  zero-ref   
;* V07 tmp5         [V07    ] (  0,  0   )     int  ->  zero-ref   
;* V08 tmp6         [V08    ] (  0,  0   )     int  ->  zero-ref   
;  V09 tmp7         [V09,T03] (  2,  2   )     int  ->  rax         V02.flags(offs=0x00) P-INDEP
;  V10 tmp8         [V10,T04] (  2,  2   )     int  ->   r8         V02.hi(offs=0x04) P-INDEP
;  V11 tmp9         [V11,T05] (  2,  2   )     int  ->   r9         V02.lo(offs=0x08) P-INDEP
;  V12 tmp10        [V12,T06] (  2,  2   )     int  ->  rdx         V02.mid(offs=0x0c) P-INDEP
;  V13 tmp11        [V13,T07] (  2,  2   )     int  ->  rax         V03.flags(offs=0x00) P-INDEP
;  V14 tmp12        [V14,T08] (  2,  2   )     int  ->   r8         V03.hi(offs=0x04) P-INDEP
;  V15 tmp13        [V15,T09] (  2,  2   )     int  ->   r9         V03.lo(offs=0x08) P-INDEP
;  V16 tmp14        [V16,T10] (  2,  2   )     int  ->  rdx         V03.mid(offs=0x0c) P-INDEP
;  V17 tmp15        [V17,T12] (  2,  0   )     ref  ->  rcx        
;  V18 tmp16        [V18,T13] (  2,  0   )     ref  ->  rdx        
;  V19 OutArgs      [V19    ] (  1,  1   )  lclBlk (32) [rsp+0x00]  
;
; Lcl frame size = 32
G_M42161_IG01:
       push     rsi
       sub      rsp, 32
G_M42161_IG02:
       mov      eax, dword ptr [rdx]
       mov      r8d, dword ptr [rdx+4]
       mov      r9d, dword ptr [rdx+8]
       mov      edx, dword ptr [rdx+12]
       and      eax, 0xD1FFAB1E
       test     eax, 0xD1FFAB1E
       jne      G_M42161_IG04
       mov      r10d, eax
       and      r10d, 0xD1FFAB1E
       cmp      r10d, 0xD1FFAB1E
       jg       G_M42161_IG04
       mov      dword ptr [rcx], eax
       mov      dword ptr [rcx+4], r8d
       mov      dword ptr [rcx+8], r9d
       mov      dword ptr [rcx+12], edx
       mov      rax, rcx
G_M42161_IG03:
       add      rsp, 32
       pop      rsi
       ret      
************** Beginning of cold code **************
G_M42161_IG04:
       lea      rcx, [(reloc)]
       call     CORINFO_HELP_NEWSFAST
       mov      rsi, rax
       mov      ecx, 0x2648
       call     CORINFO_HELP_STRCNS_CURRENT_MODULE
       mov      rcx, rax
       xor      rdx, rdx
       call     SR:GetResourceString(ref,ref):ref
       mov      rdx, rax
       mov      rcx, rsi
       call     ArgumentException:.ctor(ref):this
       mov      rcx, rsi
       call     CORINFO_HELP_THROW
       int3     
; Total bytes of code 134, prolog size 5 for method Math:Abs(struct):struct
; ============================================================

But seems to have made it worse by making the variables do-not-enreg[X] addr-exposed

; Assembly listing for method Math:Abs(struct):struct
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 RetBuf       [V00,T01] (  7,  7   )   byref  ->  rcx        
;  V01 arg0         [V01,T00] (  7, 12   )   byref  ->  rdx        
;* V02 loc0         [V02    ] (  0,  0   )  struct (16) zero-ref   
;  V03 tmp1         [V03    ] ( 13, 24   )  struct (16) [rsp+0x28]   do-not-enreg[XS] addr-exposed
;  V04 tmp2         [V04,T02] (  4,  7   )     int  ->  rax        
;* V05 tmp3         [V05    ] (  0,  0   )     int  ->  zero-ref   
;* V06 tmp4         [V06    ] (  0,  0   )     int  ->  zero-ref   
;* V07 tmp5         [V07    ] (  0,  0   )     int  ->  zero-ref   
;  V08 tmp6         [V08,T03] (  2,  2   )     int  ->  rax         V02.flags(offs=0x00) P-INDEP
;  V09 tmp7         [V09,T04] (  2,  2   )     int  ->   r8         V02.hi(offs=0x04) P-INDEP
;  V10 tmp8         [V10,T05] (  2,  2   )     int  ->   r9         V02.lo(offs=0x08) P-INDEP
;  V11 tmp9         [V11,T06] (  2,  2   )     int  ->  rdx         V02.mid(offs=0x0c) P-INDEP
;  V12 tmp10        [V12    ] (  4,  3   )     int  ->  [rsp+0x28]   do-not-enreg[X] addr-exposed V03.flags(offs=0x00) P-DEP
;  V13 tmp11        [V13    ] (  4,  3   )     int  ->  [rsp+0x2C]   do-not-enreg[X] addr-exposed V03.hi(offs=0x04) P-DEP
;  V14 tmp12        [V14    ] (  4,  3   )     int  ->  [rsp+0x30]   do-not-enreg[X] addr-exposed V03.lo(offs=0x08) P-DEP
;  V15 tmp13        [V15    ] (  4,  3   )     int  ->  [rsp+0x34]   do-not-enreg[X] addr-exposed V03.mid(offs=0x0c) P-DEP
;  V16 OutArgs      [V16    ] (  1,  1   )  lclBlk (32) [rsp+0x00]  
;
; Lcl frame size = 56
G_M42161_IG01:
       sub      rsp, 56
       nop      
G_M42161_IG02:
       mov      eax, dword ptr [rdx]
       mov      r8d, dword ptr [rdx+4]
       mov      r9d, dword ptr [rdx+8]
       mov      edx, dword ptr [rdx+12]
       xor      r10d, r10d
       mov      dword ptr [rsp+28H], r10d
       mov      dword ptr [rsp+2CH], r10d
       mov      dword ptr [rsp+30H], r10d
       mov      dword ptr [rsp+34H], r10d
       and      eax, 0xD1FFAB1E
       test     eax, 0xD1FFAB1E
       jne      G_M42161_IG04
       mov      r10d, eax
       and      r10d, 0xD1FFAB1E
       cmp      r10d, 0xD1FFAB1E
       jg       G_M42161_IG04
       mov      dword ptr [rsp+30H], r9d
       mov      dword ptr [rsp+34H], edx
       mov      dword ptr [rsp+2CH], r8d
       mov      dword ptr [rsp+28H], eax
       mov      eax, dword ptr [rsp+28H]
       mov      dword ptr [rcx], eax
       mov      eax, dword ptr [rsp+2CH]
       mov      dword ptr [rcx+4], eax
       mov      eax, dword ptr [rsp+30H]
       mov      dword ptr [rcx+8], eax
       mov      eax, dword ptr [rsp+34H]
       mov      dword ptr [rcx+12], eax
       mov      rax, rcx
G_M42161_IG03:
       add      rsp, 56
       ret      
************** Beginning of cold code **************
G_M42161_IG04:
       lea      rcx, bword ptr [rsp+28H]
       call     Decimal:ThrowInvalidDecimalBytes():this
       int3     
; Total bytes of code 144, prolog size 5 for method Math:Abs(struct):struct

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really sure why, but will revert

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect the decimal improvement will grow once it is fine tuned with all the feedback. E.g.:

  • I think that Decimal Abs can assume that the in-comming decimal is valid. It is what all other Decimal ops do. I do not think it needs to throw at all.
  • There seemed to be a bug introduced in SetBits by your change - it was not throwing on invalid flags anymore.
  • SetBits can be folded into constructor that takes byte[]. It does not look like an improvement for it to be separate.

More smaller PRs move faster.

}

// Constructs a Decimal from its constituent parts.
Expand Down Expand Up @@ -338,14 +336,18 @@ void IDeserializationCallback.OnDeserialization(Object sender)
// Constructs a Decimal from its constituent parts.
private Decimal(int lo, int mid, int hi, int flags)
{
if ((flags & ~(SignMask | ScaleMask)) == 0 && (flags & ScaleMask) <= (28 << 16))
{
this.lo = lo;
this.mid = mid;
this.hi = hi;
this.flags = flags;
return;
}
if (!((flags & ~(SignMask | ScaleMask)) == 0 && (flags & ScaleMask) <= (28 << 16)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this intentional?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was; but is also completely unnecessary as the function doesn't return anything - will revert

ThrowInvalidDecimalBytes();

this.lo = lo;
this.mid = mid;
this.hi = hi;
this.flags = flags;
}

[StackTraceHidden]
private void ThrowInvalidDecimalBytes()
{
throw new ArgumentException(SR.Arg_DecBitCtor);
}

Expand Down