New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RyuJIT spills 16 bit value but reloads as 32 bits in ARM32/x86 in release #18867

Open
jakobbotsch opened this Issue Jul 11, 2018 · 14 comments

Comments

Projects
None yet
3 participants
@jakobbotsch
Collaborator

jakobbotsch commented Jul 11, 2018

For ARM32, the example is:

interface IRT { void WriteLine<T>(T val); }
class CRT : IRT { public void WriteLine<T>(T val) => System.Console.WriteLine(val); }
public class Program
{
    static IRT s_rt;
    static byte[] s_1 = new byte[] { 0 };
    static int s_3;
    static ushort[] s_8 = new ushort[] { 1 };

    public static void Main()
    {
        s_rt = new CRT();
        M11(s_8, 0, 0, 0, true, s_1);
    }

    static ushort M11(ushort[] arg0, ushort arg1, ushort arg3, byte arg4, bool arg7, byte[] arg10)
    {
        if (arg7)
        {
            ulong var4 = (ulong)s_3;
            arg3 = s_8[0];
            ushort var5 = arg3;
            s_rt.WriteLine(var4);
            s_rt.WriteLine((int)var5); // prints large negative value on my machine
        }
		
        if (!arg7)
        {
            var vr7 = arg0[0];
        }

        arg10[0] = arg4;
        return arg1;
    }
}

This does not repro on x64. With AltJit I get the following disassembly, which shows (int)var5 loading the argument as 32 bits:

; Assembly listing for method Program:M11(ref,ushort,ushort,ubyte,bool,ref):ushort
; Emitting BLENDED_CODE for generic ARM CPU
; optimized code
; r11 based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  5,  3.50)     ref  ->   r4         class-hnd
;  V01 arg1         [V01,T05] (  3,  3   )  ushort  ->   r7
;  V02 arg2         [V02,T04] (  5,  3.50)  ushort  ->  [sp+0x0C]
;  V03 arg3         [V03,T03] (  4,  4   )   ubyte  ->   r5
;  V04 arg4         [V04,T10] (  2,  2   )    bool  ->   r0
;  V05 arg5         [V05,T06] (  4,  4   )     ref  ->   r8         class-hnd
;* V06 loc0         [V06    ] (  0,  0   )    long  ->  zero-ref
;* V07 loc1         [V07    ] (  0,  0   )  ushort  ->  zero-ref
;# V08 OutArgs      [V08    ] (  1,  1   )  lclBlk ( 0) [sp+0x00]
;  V09 tmp1         [V09,T07] (  3,  3   )     ref  ->  [sp+0x00]
;  V10 tmp2         [V10,T11] (  2,  2   )     int  ->  [sp+0x08]
;  V11 tmp3         [V11,T08] (  3,  3   )     ref  ->   r6
;  V12 tmp4         [V12,T12] (  2,  2   )     int  ->  [sp+0x04]
;  V13 tmp5         [V13,T01] (  6,  6   )     ref  ->   r0
;  V14 cse0         [V14,T02] (  6,  6   )     int  ->   r9
;  V15 rat0         [V15,T13] (  2,  1   )     int  ->   r6         V06.lo(offs=0x00)
;  V16 rat1         [V16,T14] (  2,  1   )     int  ->  r10         V06.hi(offs=0x04)
;  V17 rat2         [V17,T09] (  3,  3   )     int  ->   r0
;
; Lcl frame size = 20

G_M58754_IG01:
000000  E92D 4FF0      push    {r4,r5,r6,r7,r8,r9,r10,r11,lr}
000004  B085           sub     sp, 20
000006  F10D 0B30      add     r11, sp, 48
00000A  4604           mov     r4, r0
00000C  460F           mov     r7, r1
00000E  4616           mov     r6, r2
000010  461D           mov     r5, r3
000012  980E           ldr     r0, [sp+0x38]    // [V04 arg4]
000014  F8DD 803C      ldr     r8, [sp+0x3c]    // [V05 arg5]

G_M58754_IG02:
000018  FA5F F980      uxtb    r9, r0
00001C  F1B9 0F00      cmp     r9, 0
000020  D047           beq     SHORT G_M58754_IG03
000022  F244 008C      movw    r0, 0x408c
000026  F2C0 00EF      movt    r0, 0xef
00002A  6800           ldr     r0, [r0]
00002C  EA4F 7AE0      asr     r10, r0, 31
000030  4606           mov     r6, r0
000032  F641 40B4      movw    r0, 0x1cb4
000036  F2C0 6024      movt    r0, 0x624
00003A  6800           ldr     r0, [r0]
00003C  2100           movs    r1, 0
00003E  6842           ldr     r2, [r0+4]
000040  4291           cmp     r1, r2
000042  D248           bhs     SHORT G_M58754_IG06
000044  8900           ldrh    r0, [r0+8]
000046  F8AD 000C      strh    r0, [sp+0x0c]    // [V02 arg2]  <------------------ store as 16 bits
00004A  F641 40AC      movw    r0, 0x1cac
00004E  F2C0 6024      movt    r0, 0x624
000052  F8D0 E000      ldr     lr, [r0]
000056  F8CD E000      str     lr, [sp] // [V09 tmp1]
00005A  4670           mov     r0, lr
00005C  F644 1194      movw    r1, 0x4994
000060  F2C0 01EF      movt    r1, 0xef
000064  F644 3210      movw    r2, 0x4b10
000068  F2C0 02EF      movt    r2, 0xef
00006C  F245 5CA0      movw    r12, 0x55a0
000070  F6C0 7CCB      movt    r12, 0xfcb
000074  47E0           blx     r12              // CORINFO_HELP_VIRTUAL_FUNC_PTR
000076  9002           str     r0, [sp+0x08]    // [V10 tmp2]
000078  9800           ldr     r0, [sp] // [V09 tmp1]
00007A  4632           mov     r2, r6
00007C  4653           mov     r3, r10
00007E  9902           ldr     r1, [sp+0x08]    // [V10 tmp2]
000080  4788           blx     r1
000082  F641 40AC      movw    r0, 0x1cac
000086  F2C0 6024      movt    r0, 0x624
00008A  6806           ldr     r6, [r0]
00008C  4630           mov     r0, r6
00008E  F644 1194      movw    r1, 0x4994
000092  F2C0 01EF      movt    r1, 0xef
000096  F644 3274      movw    r2, 0x4b74
00009A  F2C0 02EF      movt    r2, 0xef
00009E  F245 53A0      movw    r3, 0x55a0
0000A2  F6C0 73CB      movt    r3, 0xfcb
0000A6  4798           blx     r3               // CORINFO_HELP_VIRTUAL_FUNC_PTR
0000A8  9001           str     r0, [sp+0x04]    // [V12 tmp4]
0000AA  4630           mov     r0, r6
0000AC  9903           ldr     r1, [sp+0x0c]    // [V02 arg2]   <------------------ load as 32 bits
0000AE  9B01           ldr     r3, [sp+0x04]    // [V12 tmp4]
0000B0  4798           blx     r3

G_M58754_IG03:
0000B2  F1B9 0F00      cmp     r9, 0
0000B6  D103           bne     SHORT G_M58754_IG04
0000B8  2000           movs    r0, 0
0000BA  6863           ldr     r3, [r4+4]
0000BC  4298           cmp     r0, r3
0000BE  D20A           bhs     SHORT G_M58754_IG06

G_M58754_IG04:
0000C0  2000           movs    r0, 0
0000C2  F8D8 3004      ldr     r3, [r8+4]
0000C6  4298           cmp     r0, r3
0000C8  D205           bhs     SHORT G_M58754_IG06
0000CA  F888 5008      strb    r5, [r8+8]
0000CE  B2B8           uxth    r0, r7

G_M58754_IG05:
0000D0  B005           add     sp, 20
0000D2  E8BD 8FF0      pop     {r4,r5,r6,r7,r8,r9,r10,r11,pc}

G_M58754_IG06:
0000D6  F647 33B0      movw    r3, 0x7bb0
0000DA  F6C0 73CB      movt    r3, 0xfcb
0000DE  4798           blx     r3               // CORINFO_HELP_RNGCHKFAIL
0000E0  DEFE           bkpt

; Total bytes of code 226, prolog size 10 for method Program:M11(ref,ushort,ushort,ubyte,bool,ref):ushort
; ============================================================

It seems most of the example is only necessary to ensure that arg3 get spilled to the stack. Without the spill, the example runs correctly.

AltJit dump: https://gist.github.com/jakobbotsch/6ebc59af5e0a90df73c8c109a58bd301

EDIT: With the help of @mikedn, here is an adapted example which shows the same problem on x86. On my machine this prints the result correctly, but the disassembly shows that it potentially passes garbage to the WriteLine call:

interface IRT { void WriteLine<T>(T val); }
class CRT : IRT { public void WriteLine<T>(T val) => System.Console.WriteLine(val); }
public class Program
{
    static IRT s_rt;
    static byte[] s_1 = new byte[] { 0 };
    static int s_3;
    static ushort[] s_8 = new ushort[] { 1 };

    public static void Main()
    {
        s_rt = new CRT();
        M11(0, s_8, 0, 0, true, s_1);
    }

    static ushort M11(ushort arg3, ushort[] arg0, ushort arg1, byte arg4, bool arg7, byte[] arg10)
    {
        arg1 += arg4;
        arg1 += (byte)(arg7 ? 1 : 0);
        arg1 += arg10[0];
        arg4 += arg10[0];
        if (arg7)
        {
            ulong var4 = (ulong)s_3;
            arg3 = s_8[0];
            ushort var5 = arg3;
            s_rt.WriteLine(var4);
            s_rt.WriteLine((int)var5); // potential garbage passed in upper half
        }
		
        if (!arg7)
        {
            var vr7 = arg0[0];
        }

        arg10[0] = arg4;
        return arg1;
    }
}
@mikedn

This comment has been minimized.

Show comment
Hide comment
@mikedn

mikedn Jul 11, 2018

Contributor

spills 16 bit value but reloads as 32 bits in ARM32

It looks like this can also happen on x86:

            ulong var4 = (ulong)s_3;
            arg3 += s_8[0]; // make it += instead of =
            ushort var5 = arg3;
            s_rt.WriteLine(var4);
            s_rt.WriteLine((int)var5); // prints large negative value on my machine

generates

       0FB77608     movzx    esi, word  ptr [esi+8]
       03D6         add      edx, esi
       0FB7D2       movzx    edx, dx
       66895514     mov      word  ptr [ebp+14H], dx
       8B35AC1CC005 mov      esi, gword ptr [classVar[0x8c4108]]
       6880448C00   push     0x8C4480
       8BCE         mov      ecx, esi
       BA04438C00   mov      edx, 0x8C4304
       E86BBFDF0E   call     CORINFO_HELP_VIRTUAL_FUNC_PTR
       53           push     ebx
       57           push     edi
       8BCE         mov      ecx, esi
       FFD0         call     eax
       8B35AC1CC005 mov      esi, gword ptr [classVar[0x8c4108]]
       68E4448C00   push     0x8C44E4
       8BCE         mov      ecx, esi
       BA04438C00   mov      edx, 0x8C4304
       E84EBFDF0E   call     CORINFO_HELP_VIRTUAL_FUNC_PTR
       8BCE         mov      ecx, esi
       8B5514       mov      edx, dword ptr [ebp+14H]
       FFD0         call     eax

The difference is that on x86 it is very unlikely that the upper bytes are garbage. On x86 arg3 is passed on the stack so its associated stack location is setup by the caller, usually with an integer push. And the value pushed will normally be appropriately zero extended.

However, on ARM32 arg3 is passed in a register and its associated stack location is a normal local variable. That local variable is never written to before arg3 = s_8[0] so it will contain stack garbage. And then strh stores only 2 bytes so the upper bytes remain garbage.

I'm not sure what exactly is bugged. It could be that "normalize on store" is not set correctly on ARM32. Though the x86 behavior, while it happens to work, it's kind of weird as well:

       03D6         add      edx, esi
; so why bother with this if we store only a word?
       0FB7D2       movzx    edx, dx
; or why bother with a word store (requires 66h prefix) if you already zero extended the value?
       66895514     mov      word  ptr [ebp+14H], dx
Contributor

mikedn commented Jul 11, 2018

spills 16 bit value but reloads as 32 bits in ARM32

It looks like this can also happen on x86:

            ulong var4 = (ulong)s_3;
            arg3 += s_8[0]; // make it += instead of =
            ushort var5 = arg3;
            s_rt.WriteLine(var4);
            s_rt.WriteLine((int)var5); // prints large negative value on my machine

generates

       0FB77608     movzx    esi, word  ptr [esi+8]
       03D6         add      edx, esi
       0FB7D2       movzx    edx, dx
       66895514     mov      word  ptr [ebp+14H], dx
       8B35AC1CC005 mov      esi, gword ptr [classVar[0x8c4108]]
       6880448C00   push     0x8C4480
       8BCE         mov      ecx, esi
       BA04438C00   mov      edx, 0x8C4304
       E86BBFDF0E   call     CORINFO_HELP_VIRTUAL_FUNC_PTR
       53           push     ebx
       57           push     edi
       8BCE         mov      ecx, esi
       FFD0         call     eax
       8B35AC1CC005 mov      esi, gword ptr [classVar[0x8c4108]]
       68E4448C00   push     0x8C44E4
       8BCE         mov      ecx, esi
       BA04438C00   mov      edx, 0x8C4304
       E84EBFDF0E   call     CORINFO_HELP_VIRTUAL_FUNC_PTR
       8BCE         mov      ecx, esi
       8B5514       mov      edx, dword ptr [ebp+14H]
       FFD0         call     eax

The difference is that on x86 it is very unlikely that the upper bytes are garbage. On x86 arg3 is passed on the stack so its associated stack location is setup by the caller, usually with an integer push. And the value pushed will normally be appropriately zero extended.

However, on ARM32 arg3 is passed in a register and its associated stack location is a normal local variable. That local variable is never written to before arg3 = s_8[0] so it will contain stack garbage. And then strh stores only 2 bytes so the upper bytes remain garbage.

I'm not sure what exactly is bugged. It could be that "normalize on store" is not set correctly on ARM32. Though the x86 behavior, while it happens to work, it's kind of weird as well:

       03D6         add      edx, esi
; so why bother with this if we store only a word?
       0FB7D2       movzx    edx, dx
; or why bother with a word store (requires 66h prefix) if you already zero extended the value?
       66895514     mov      word  ptr [ebp+14H], dx
@jakobbotsch

This comment has been minimized.

Show comment
Hide comment
@jakobbotsch

jakobbotsch Jul 11, 2018

Collaborator

I haven't checked exactly the calling convention used by the JIT, but it looks like __fastcall. If so shouldn't we be able to see the same behavior if we pass a 16-bit value as one of the first two arguments and the JIT ends up spilling it to the stack?

Collaborator

jakobbotsch commented Jul 11, 2018

I haven't checked exactly the calling convention used by the JIT, but it looks like __fastcall. If so shouldn't we be able to see the same behavior if we pass a 16-bit value as one of the first two arguments and the JIT ends up spilling it to the stack?

@mikedn

This comment has been minimized.

Show comment
Hide comment
@mikedn

mikedn Jul 11, 2018

Contributor

If so shouldn't we be able to see the same behavior if we pass a 16-bit value as one of the first two arguments and the JIT ends up spilling it to the stack?

Yep, that's something I've yet to check.

Contributor

mikedn commented Jul 11, 2018

If so shouldn't we be able to see the same behavior if we pass a 16-bit value as one of the first two arguments and the JIT ends up spilling it to the stack?

Yep, that's something I've yet to check.

@jakobbotsch

This comment has been minimized.

Show comment
Hide comment
@jakobbotsch

jakobbotsch Jul 11, 2018

Collaborator

Hmm nope. Here that happens:

interface IRT { void WriteLine<T>(T val); }
class CRT : IRT { public void WriteLine<T>(T val) => System.Console.WriteLine(val); }
public class Program
{
    static IRT s_rt;
    static byte[] s_1 = new byte[] { 0 };
    static int s_3;
    static ushort[] s_8 = new ushort[] { 1 };

    public static void Main()
    {
        s_rt = new CRT();
        M11(0, s_8, 0, 0, true, s_1);
    }

    static ushort M11(ushort arg3, ushort[] arg0, ushort arg1, byte arg4, bool arg7, byte[] arg10)
    {
        arg1 += arg4;
        arg1 += (byte)(arg7 ? 1 : 0);
        arg1 += arg10[0];
        arg4 += arg10[0];
        if (arg7)
        {
            ulong var4 = (ulong)s_3;
            arg3 += s_8[0];
            ushort var5 = arg3;
            s_rt.WriteLine(var4);
            s_rt.WriteLine((int)var5); // prints large negative value on my machine
        }
		
        if (!arg7)
        {
            var vr7 = arg0[0];
        }

        arg10[0] = arg4;
        return arg1;
    }
}

But the JIT does a wide store to initialize the stack slot:

; Assembly listing for method Program:M11(ushort,ref,ushort,ubyte,bool,ref):ushort
; Emitting BLENDED_CODE for generic X86 CPU
; optimized code
; ebp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T06] (  7,  4.50)  ushort  ->  [ebp-0x10]
;  V01 arg1         [V01,T07] (  5,  3.50)     ref  ->  [ebp-0x20]   class-hnd
;  V02 arg2         [V02,T00] ( 15, 15   )  ushort  ->  esi
;  V03 arg3         [V03,T03] (  9,  9   )   ubyte  ->  ebx
;  V04 arg4         [V04,T16] (  2,  2   )    bool  ->  ecx
;  V05 arg5         [V05,T01] ( 11, 11   )     ref  ->  edi         class-hnd
;* V06 loc0         [V06    ] (  0,  0   )    long  ->  zero-ref
;* V07 loc1         [V07    ] (  0,  0   )  ushort  ->  zero-ref
;  V08 tmp0         [V08,T13] (  3,  2   )     int  ->  esi
;  V09 tmp1         [V09,T14] (  3,  2   )     int  ->  esi
;  V10 tmp2         [V10,T15] (  3,  2   )     int  ->  edx
;  V11 tmp3         [V11,T10] (  3,  3   )     ref  ->  [ebp-0x24]
;  V12 tmp4         [V12,T17] (  2,  2   )     int  ->  eax
;  V13 tmp5         [V13,T11] (  3,  3   )     ref  ->  [ebp-0x28]
;  V14 tmp6         [V14,T18] (  2,  2   )     int  ->  eax
;  V15 tmp7         [V15,T08] (  6,  6   )     ref  ->  edx
;  V16 cse0         [V16,T02] ( 11, 11   )     int  ->  edx
;  V17 cse1         [V17,T05] (  7,  7   )     int  ->  edx
;  V18 cse2         [V18,T04] (  8,  8   )     int  ->  [ebp-0x14]
;  V19 cse3         [V19,T09] (  6,  6   )     int  ->  ebx
;  V20 rat0         [V20,T19] (  2,  1   )     int  ->  [ebp-0x18]   V06.lo(offs=0x00)
;  V21 rat1         [V21,T20] (  2,  1   )     int  ->  [ebp-0x1C]   V06.hi(offs=0x04)
;  V22 rat2         [V22,T12] (  3,  3   )     int  ->  edx
;
; Lcl frame size = 28

G_M58754_IG01:
       55           push     ebp
       8BEC         mov      ebp, esp
       57           push     edi
       56           push     esi
       53           push     ebx
       83EC1C       sub      esp, 28
       894DF0       mov      dword ptr [ebp-10H], ecx     ; wide initialize
       8955E0       mov      gword ptr [ebp-20H], edx
       8B7514       mov      esi, dword ptr [ebp+14H]
       8B5D10       mov      ebx, dword ptr [ebp+10H]
       8B4D0C       mov      ecx, dword ptr [ebp+0CH]
       8B7D08       mov      edi, gword ptr [ebp+08H]

G_M58754_IG02:
       0FB7F6       movzx    esi, si
       0FB6DB       movzx    ebx, bl
       03F3         add      esi, ebx
       0FB7F6       movzx    esi, si
       0FB6C9       movzx    ecx, cl
       85C9         test     ecx, ecx
       7504         jne      SHORT G_M58754_IG03
       33D2         xor      edx, edx
       EB05         jmp      SHORT G_M58754_IG04

G_M58754_IG03:
       BA01000000   mov      edx, 1

G_M58754_IG04:
       0FB6D2       movzx    edx, dl
       03F2         add      esi, edx
       0FB7F6       movzx    esi, si
       8B5704       mov      edx, dword ptr [edi+4]
       83FA00       cmp      edx, 0
       0F86AB000000 jbe      G_M58754_IG08
       0FB65708     movzx    edx, byte  ptr [edi+8]
       03F2         add      esi, edx
       0FB7F6       movzx    esi, si
       03DA         add      ebx, edx
       0FB6DB       movzx    ebx, bl
       894DEC       mov      dword ptr [ebp-14H], ecx
       85C9         test     ecx, ecx
       7476         je       SHORT G_M58754_IG05
       8B158C40E402 mov      edx, dword ptr [classVar[0x2e447f8]]
       8BC2         mov      eax, edx
       C1F81F       sar      eax, 31
       8955E8       mov      dword ptr [ebp-18H], edx
       8945E4       mov      dword ptr [ebp-1CH], eax
       8B45F0       mov      eax, dword ptr [ebp-10H]
       0FB7C0       movzx    eax, ax
       8B15B41C4206 mov      edx, gword ptr [classVar[0x2e44808]]
       837A0400     cmp      dword ptr [edx+4], 0
       7673         jbe      SHORT G_M58754_IG08
       0FB75208     movzx    edx, word  ptr [edx+8]
       03C2         add      eax, edx
       0FB7C0       movzx    eax, ax
       668945F0     mov      word  ptr [ebp-10H], ax         ; still a narrow store
       8B15AC1C4206 mov      edx, gword ptr [classVar[0x2e447d8]]
       68104BE402   push     0x2E44B10
       8955DC       mov      gword ptr [ebp-24H], edx
       8BCA         mov      ecx, edx
       BA9449E402   mov      edx, 0x2E44994
       E87732110B   call     CORINFO_HELP_VIRTUAL_FUNC_PTR
       FF75E4       push     dword ptr [ebp-1CH]
       FF75E8       push     dword ptr [ebp-18H]
       8B4DDC       mov      ecx, gword ptr [ebp-24H]
       FFD0         call     eax
       A1AC1C4206   mov      eax, gword ptr [classVar[0x2e447d8]]
       68744BE402   push     0x2E44B74
       8945D8       mov      gword ptr [ebp-28H], eax
       8BC8         mov      ecx, eax
       BA9449E402   mov      edx, 0x2E44994
       E85332110B   call     CORINFO_HELP_VIRTUAL_FUNC_PTR
       8B4DD8       mov      ecx, gword ptr [ebp-28H]
       8B55F0       mov      edx, dword ptr [ebp-10H]    ; and still a wide load
       FFD0         call     eax

G_M58754_IG05:
       8B4DEC       mov      ecx, dword ptr [ebp-14H]
       85C9         test     ecx, ecx
       7509         jne      SHORT G_M58754_IG06
       8B45E0       mov      eax, gword ptr [ebp-20H]
       83780400     cmp      dword ptr [eax+4], 0
       7610         jbe      SHORT G_M58754_IG08

G_M58754_IG06:
       885F08       mov      byte  ptr [edi+8], bl
       0FB7C6       movzx    eax, si

G_M58754_IG07:
       8D65F4       lea      esp, [ebp-0CH]
       5B           pop      ebx
       5E           pop      esi
       5F           pop      edi
       5D           pop      ebp
       C21000       ret      16

G_M58754_IG08:
       E83658110B   call     CORINFO_HELP_RNGCHKFAIL
       CC           int3

; Total bytes of code 251, prolog size 9 for method Program:M11(ushort,ref,ushort,ubyte,bool,ref):ushort
; ============================================================
Collaborator

jakobbotsch commented Jul 11, 2018

Hmm nope. Here that happens:

interface IRT { void WriteLine<T>(T val); }
class CRT : IRT { public void WriteLine<T>(T val) => System.Console.WriteLine(val); }
public class Program
{
    static IRT s_rt;
    static byte[] s_1 = new byte[] { 0 };
    static int s_3;
    static ushort[] s_8 = new ushort[] { 1 };

    public static void Main()
    {
        s_rt = new CRT();
        M11(0, s_8, 0, 0, true, s_1);
    }

    static ushort M11(ushort arg3, ushort[] arg0, ushort arg1, byte arg4, bool arg7, byte[] arg10)
    {
        arg1 += arg4;
        arg1 += (byte)(arg7 ? 1 : 0);
        arg1 += arg10[0];
        arg4 += arg10[0];
        if (arg7)
        {
            ulong var4 = (ulong)s_3;
            arg3 += s_8[0];
            ushort var5 = arg3;
            s_rt.WriteLine(var4);
            s_rt.WriteLine((int)var5); // prints large negative value on my machine
        }
		
        if (!arg7)
        {
            var vr7 = arg0[0];
        }

        arg10[0] = arg4;
        return arg1;
    }
}

But the JIT does a wide store to initialize the stack slot:

; Assembly listing for method Program:M11(ushort,ref,ushort,ubyte,bool,ref):ushort
; Emitting BLENDED_CODE for generic X86 CPU
; optimized code
; ebp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T06] (  7,  4.50)  ushort  ->  [ebp-0x10]
;  V01 arg1         [V01,T07] (  5,  3.50)     ref  ->  [ebp-0x20]   class-hnd
;  V02 arg2         [V02,T00] ( 15, 15   )  ushort  ->  esi
;  V03 arg3         [V03,T03] (  9,  9   )   ubyte  ->  ebx
;  V04 arg4         [V04,T16] (  2,  2   )    bool  ->  ecx
;  V05 arg5         [V05,T01] ( 11, 11   )     ref  ->  edi         class-hnd
;* V06 loc0         [V06    ] (  0,  0   )    long  ->  zero-ref
;* V07 loc1         [V07    ] (  0,  0   )  ushort  ->  zero-ref
;  V08 tmp0         [V08,T13] (  3,  2   )     int  ->  esi
;  V09 tmp1         [V09,T14] (  3,  2   )     int  ->  esi
;  V10 tmp2         [V10,T15] (  3,  2   )     int  ->  edx
;  V11 tmp3         [V11,T10] (  3,  3   )     ref  ->  [ebp-0x24]
;  V12 tmp4         [V12,T17] (  2,  2   )     int  ->  eax
;  V13 tmp5         [V13,T11] (  3,  3   )     ref  ->  [ebp-0x28]
;  V14 tmp6         [V14,T18] (  2,  2   )     int  ->  eax
;  V15 tmp7         [V15,T08] (  6,  6   )     ref  ->  edx
;  V16 cse0         [V16,T02] ( 11, 11   )     int  ->  edx
;  V17 cse1         [V17,T05] (  7,  7   )     int  ->  edx
;  V18 cse2         [V18,T04] (  8,  8   )     int  ->  [ebp-0x14]
;  V19 cse3         [V19,T09] (  6,  6   )     int  ->  ebx
;  V20 rat0         [V20,T19] (  2,  1   )     int  ->  [ebp-0x18]   V06.lo(offs=0x00)
;  V21 rat1         [V21,T20] (  2,  1   )     int  ->  [ebp-0x1C]   V06.hi(offs=0x04)
;  V22 rat2         [V22,T12] (  3,  3   )     int  ->  edx
;
; Lcl frame size = 28

G_M58754_IG01:
       55           push     ebp
       8BEC         mov      ebp, esp
       57           push     edi
       56           push     esi
       53           push     ebx
       83EC1C       sub      esp, 28
       894DF0       mov      dword ptr [ebp-10H], ecx     ; wide initialize
       8955E0       mov      gword ptr [ebp-20H], edx
       8B7514       mov      esi, dword ptr [ebp+14H]
       8B5D10       mov      ebx, dword ptr [ebp+10H]
       8B4D0C       mov      ecx, dword ptr [ebp+0CH]
       8B7D08       mov      edi, gword ptr [ebp+08H]

G_M58754_IG02:
       0FB7F6       movzx    esi, si
       0FB6DB       movzx    ebx, bl
       03F3         add      esi, ebx
       0FB7F6       movzx    esi, si
       0FB6C9       movzx    ecx, cl
       85C9         test     ecx, ecx
       7504         jne      SHORT G_M58754_IG03
       33D2         xor      edx, edx
       EB05         jmp      SHORT G_M58754_IG04

G_M58754_IG03:
       BA01000000   mov      edx, 1

G_M58754_IG04:
       0FB6D2       movzx    edx, dl
       03F2         add      esi, edx
       0FB7F6       movzx    esi, si
       8B5704       mov      edx, dword ptr [edi+4]
       83FA00       cmp      edx, 0
       0F86AB000000 jbe      G_M58754_IG08
       0FB65708     movzx    edx, byte  ptr [edi+8]
       03F2         add      esi, edx
       0FB7F6       movzx    esi, si
       03DA         add      ebx, edx
       0FB6DB       movzx    ebx, bl
       894DEC       mov      dword ptr [ebp-14H], ecx
       85C9         test     ecx, ecx
       7476         je       SHORT G_M58754_IG05
       8B158C40E402 mov      edx, dword ptr [classVar[0x2e447f8]]
       8BC2         mov      eax, edx
       C1F81F       sar      eax, 31
       8955E8       mov      dword ptr [ebp-18H], edx
       8945E4       mov      dword ptr [ebp-1CH], eax
       8B45F0       mov      eax, dword ptr [ebp-10H]
       0FB7C0       movzx    eax, ax
       8B15B41C4206 mov      edx, gword ptr [classVar[0x2e44808]]
       837A0400     cmp      dword ptr [edx+4], 0
       7673         jbe      SHORT G_M58754_IG08
       0FB75208     movzx    edx, word  ptr [edx+8]
       03C2         add      eax, edx
       0FB7C0       movzx    eax, ax
       668945F0     mov      word  ptr [ebp-10H], ax         ; still a narrow store
       8B15AC1C4206 mov      edx, gword ptr [classVar[0x2e447d8]]
       68104BE402   push     0x2E44B10
       8955DC       mov      gword ptr [ebp-24H], edx
       8BCA         mov      ecx, edx
       BA9449E402   mov      edx, 0x2E44994
       E87732110B   call     CORINFO_HELP_VIRTUAL_FUNC_PTR
       FF75E4       push     dword ptr [ebp-1CH]
       FF75E8       push     dword ptr [ebp-18H]
       8B4DDC       mov      ecx, gword ptr [ebp-24H]
       FFD0         call     eax
       A1AC1C4206   mov      eax, gword ptr [classVar[0x2e447d8]]
       68744BE402   push     0x2E44B74
       8945D8       mov      gword ptr [ebp-28H], eax
       8BC8         mov      ecx, eax
       BA9449E402   mov      edx, 0x2E44994
       E85332110B   call     CORINFO_HELP_VIRTUAL_FUNC_PTR
       8B4DD8       mov      ecx, gword ptr [ebp-28H]
       8B55F0       mov      edx, dword ptr [ebp-10H]    ; and still a wide load
       FFD0         call     eax

G_M58754_IG05:
       8B4DEC       mov      ecx, dword ptr [ebp-14H]
       85C9         test     ecx, ecx
       7509         jne      SHORT G_M58754_IG06
       8B45E0       mov      eax, gword ptr [ebp-20H]
       83780400     cmp      dword ptr [eax+4], 0
       7610         jbe      SHORT G_M58754_IG08

G_M58754_IG06:
       885F08       mov      byte  ptr [edi+8], bl
       0FB7C6       movzx    eax, si

G_M58754_IG07:
       8D65F4       lea      esp, [ebp-0CH]
       5B           pop      ebx
       5E           pop      esi
       5F           pop      edi
       5D           pop      ebp
       C21000       ret      16

G_M58754_IG08:
       E83658110B   call     CORINFO_HELP_RNGCHKFAIL
       CC           int3

; Total bytes of code 251, prolog size 9 for method Program:M11(ushort,ref,ushort,ubyte,bool,ref):ushort
; ============================================================
@mikedn

This comment has been minimized.

Show comment
Hide comment
@mikedn

mikedn Jul 11, 2018

Contributor

But the JIT does a wide store to initialize the stack slot:

Yeah, for this to "work" on x86 we need to find a way to get it to store to a stack location without using +=, as doing that introduces a load from arg3. Trouble is, with the original code the variable gets enregistered on x86.

Anyway, I noticed that the original code involves some copy propagation performed during morph. This results in arg3 being used to call WriteLine instead of var5. This is another possible cause for this bug - these 2 variables have the same type but one is an argument and the other one is not. There are differences in the way method arguments are handled and it could be that copy propagation doesn't get things right.

Contributor

mikedn commented Jul 11, 2018

But the JIT does a wide store to initialize the stack slot:

Yeah, for this to "work" on x86 we need to find a way to get it to store to a stack location without using +=, as doing that introduces a load from arg3. Trouble is, with the original code the variable gets enregistered on x86.

Anyway, I noticed that the original code involves some copy propagation performed during morph. This results in arg3 being used to call WriteLine instead of var5. This is another possible cause for this bug - these 2 variables have the same type but one is an argument and the other one is not. There are differences in the way method arguments are handled and it could be that copy propagation doesn't get things right.

@jakobbotsch

This comment has been minimized.

Show comment
Hide comment
@jakobbotsch

jakobbotsch Jul 11, 2018

Collaborator

Yeah, for this to "work" on x86 we need to find a way to get it to store to a stack location without using +=, as doing that introduces a load from arg3. Trouble is, with the original code the variable gets enregistered on x86.

Ah, that was the missing link. My previous example gives the bad code-gen on x86 when removing that.

Collaborator

jakobbotsch commented Jul 11, 2018

Yeah, for this to "work" on x86 we need to find a way to get it to store to a stack location without using +=, as doing that introduces a load from arg3. Trouble is, with the original code the variable gets enregistered on x86.

Ah, that was the missing link. My previous example gives the bad code-gen on x86 when removing that.

@RussKeldorph RussKeldorph added this to the 3.0 milestone Jul 11, 2018

@RussKeldorph

This comment has been minimized.

Show comment
Hide comment
@RussKeldorph
Member

RussKeldorph commented Jul 11, 2018

@jakobbotsch jakobbotsch changed the title from RyuJIT spills 16 bit value but reloads as 32 bits in ARM32 in release to RyuJIT spills 16 bit value but reloads as 32 bits in ARM32/x86 in release Jul 11, 2018

@mikedn

This comment has been minimized.

Show comment
Hide comment
@mikedn

mikedn Jul 12, 2018

Contributor

This is a bug in copy propagation performed by local assertion propagation during morph. It does not account for variables having different "normalize on load" settings and it substitutes one "normal" lclvar (having "normalize on load" false) with an argument (having "normalize on load" true). Normalize on load requires a separate cast after the lclvar load and that cast is generated during global morph.

Local assertion propagation needs to either block copy propagation in such cases or perform a re-morph. The former is likely safer and it's also what VN based copy propagation does (it rejects lclvar that are parameters).

Contributor

mikedn commented Jul 12, 2018

This is a bug in copy propagation performed by local assertion propagation during morph. It does not account for variables having different "normalize on load" settings and it substitutes one "normal" lclvar (having "normalize on load" false) with an argument (having "normalize on load" true). Normalize on load requires a separate cast after the lclvar load and that cast is generated during global morph.

Local assertion propagation needs to either block copy propagation in such cases or perform a re-morph. The former is likely safer and it's also what VN based copy propagation does (it rejects lclvar that are parameters).

@mikedn

This comment has been minimized.

Show comment
Hide comment
@mikedn

mikedn Jul 14, 2018

Contributor

Local assertion propagation needs to either block copy propagation in such cases or perform a re-morph.

Or local assertion propagation shouldn't even generate an assertion in this case, it already does not generate an assertion if lclvar types do not match.

Contributor

mikedn commented Jul 14, 2018

Local assertion propagation needs to either block copy propagation in such cases or perform a re-morph.

Or local assertion propagation shouldn't even generate an assertion in this case, it already does not generate an assertion if lclvar types do not match.

@jakobbotsch

This comment has been minimized.

Show comment
Hide comment
@jakobbotsch

jakobbotsch Jul 16, 2018

Collaborator

Here is a repro for x64 Linux:

using System;

class C0
{
    public sbyte F2;
    public uint F7;
}

interface IRT { void WriteLine<T>(T v); }
class CRT : IRT
{
    public void WriteLine<T>(T v) => Console.WriteLine(v);
}

public class Program
{
    static C0 s_3 = new C0();
    static CRT s_rt;
    public static void Main()
    {
        s_rt = new CRT();
        var vr39 = s_3.F7;
        var vr40 = s_3.F7;
        var vr41 = (sbyte)0;
        var vr42 = s_3.F2 - 53;
        var vr43 = new ulong[, ][][]{{new ulong[][]{new ulong[]{0}}}};
        M3(vr39, vr40, vr41, true, 43, vr42, 0, 118, vr43);
    }

    static void M3(uint arg0, uint arg1, sbyte arg2, bool arg3, ushort arg4, int arg5, long arg7, ushort arg8, ulong[, ][][] arg10)
    {
        bool var1 = default(bool);
        if (arg3)
        {
            var1 |= true;
        }

        if (var1)
        {
        }
        else
        {
            arg8 = 0;
        }

        arg4 = arg8;
        ushort var17 = arg4;
        s_rt.WriteLine((int)var17); // passes garbage in top half
        arg10[0, 0][0][0] = arg10[0, 0][0][0];
        s_rt.WriteLine(arg0);
        s_rt.WriteLine(arg1);
        s_rt.WriteLine(arg2);
        s_rt.WriteLine(arg3);
        s_rt.WriteLine((int)arg8);
    }
}
Collaborator

jakobbotsch commented Jul 16, 2018

Here is a repro for x64 Linux:

using System;

class C0
{
    public sbyte F2;
    public uint F7;
}

interface IRT { void WriteLine<T>(T v); }
class CRT : IRT
{
    public void WriteLine<T>(T v) => Console.WriteLine(v);
}

public class Program
{
    static C0 s_3 = new C0();
    static CRT s_rt;
    public static void Main()
    {
        s_rt = new CRT();
        var vr39 = s_3.F7;
        var vr40 = s_3.F7;
        var vr41 = (sbyte)0;
        var vr42 = s_3.F2 - 53;
        var vr43 = new ulong[, ][][]{{new ulong[][]{new ulong[]{0}}}};
        M3(vr39, vr40, vr41, true, 43, vr42, 0, 118, vr43);
    }

    static void M3(uint arg0, uint arg1, sbyte arg2, bool arg3, ushort arg4, int arg5, long arg7, ushort arg8, ulong[, ][][] arg10)
    {
        bool var1 = default(bool);
        if (arg3)
        {
            var1 |= true;
        }

        if (var1)
        {
        }
        else
        {
            arg8 = 0;
        }

        arg4 = arg8;
        ushort var17 = arg4;
        s_rt.WriteLine((int)var17); // passes garbage in top half
        arg10[0, 0][0][0] = arg10[0, 0][0][0];
        s_rt.WriteLine(arg0);
        s_rt.WriteLine(arg1);
        s_rt.WriteLine(arg2);
        s_rt.WriteLine(arg3);
        s_rt.WriteLine((int)arg8);
    }
}
@jakobbotsch

This comment has been minimized.

Show comment
Hide comment
@jakobbotsch

jakobbotsch Aug 7, 2018

Collaborator

Here is a case for x64 that looks like the same problem:

// Debug: Outputs -1
// Release: Outputs 1
struct S0
{
    public uint F1;
    public long F2;
    public ulong F3;
    public ushort F5;
    public bool F7;
    public S0(uint f1): this()
    {
        F1 = f1;
    }
}

public class Program
{
    static S0[] s_7 = new S0[]{new S0(0)};
    public static void Main()
    {
        M9(0);
    }

    static ushort M9(short arg1)
    {
        long var0 = 0;
        try
        {
            System.GC.KeepAlive(var0);
        }
        finally
        {
            var vr12 = new ulong[]{0, 2271009908085114245UL};
            S0[] vr18 = new S0[]{new S0(32768)};
            uint vr19 = vr18[0].F1;
            arg1 = (short)vr19;
            arg1 %= -32767;
            System.GC.KeepAlive(s_7[0]);
            System.GC.KeepAlive(s_7[0]);
        }

        System.Console.WriteLine(arg1);
        return s_7[0].F5;
    }
}

The 'magic division' for the modulo in the finally shows that RyuJIT incorrectly stores arg1 as 2 bytes, and then uses it as if it was sign-extended to 4 bytes:

       480FBF4010           movsx    rax, word  ptr [rax+16]
       66894510             mov      word  ptr [rbp+10H], ax        ; 2 bytes stored
       B8FDFFFE7F           mov      eax, 0x7FFEFFFD
       F76D10               imul     edx:eax, dword ptr [rbp+10H]   ; 4 bytes used -- not sign extended
       8BCA                 mov      ecx, edx
       2B4D10               sub      ecx, dword ptr [rbp+10H]
       8BC1                 mov      eax, ecx
       C1E81F               shr      eax, 31
       C1F90E               sar      ecx, 14
       03C8                 add      ecx, eax
       69C90180FFFF         imul     ecx, ecx, -0x7FFF
       480FBF4510           movsx    rax, word  ptr [rbp+10H]
       2BC1                 sub      eax, ecx
       480FBFC8             movsx    rcx, ax

Not entirely sure why the try-finally seems to be necessary -- an x64 example did not show up until I implemented them.

@mikedn does this look like the same problem?

Collaborator

jakobbotsch commented Aug 7, 2018

Here is a case for x64 that looks like the same problem:

// Debug: Outputs -1
// Release: Outputs 1
struct S0
{
    public uint F1;
    public long F2;
    public ulong F3;
    public ushort F5;
    public bool F7;
    public S0(uint f1): this()
    {
        F1 = f1;
    }
}

public class Program
{
    static S0[] s_7 = new S0[]{new S0(0)};
    public static void Main()
    {
        M9(0);
    }

    static ushort M9(short arg1)
    {
        long var0 = 0;
        try
        {
            System.GC.KeepAlive(var0);
        }
        finally
        {
            var vr12 = new ulong[]{0, 2271009908085114245UL};
            S0[] vr18 = new S0[]{new S0(32768)};
            uint vr19 = vr18[0].F1;
            arg1 = (short)vr19;
            arg1 %= -32767;
            System.GC.KeepAlive(s_7[0]);
            System.GC.KeepAlive(s_7[0]);
        }

        System.Console.WriteLine(arg1);
        return s_7[0].F5;
    }
}

The 'magic division' for the modulo in the finally shows that RyuJIT incorrectly stores arg1 as 2 bytes, and then uses it as if it was sign-extended to 4 bytes:

       480FBF4010           movsx    rax, word  ptr [rax+16]
       66894510             mov      word  ptr [rbp+10H], ax        ; 2 bytes stored
       B8FDFFFE7F           mov      eax, 0x7FFEFFFD
       F76D10               imul     edx:eax, dword ptr [rbp+10H]   ; 4 bytes used -- not sign extended
       8BCA                 mov      ecx, edx
       2B4D10               sub      ecx, dword ptr [rbp+10H]
       8BC1                 mov      eax, ecx
       C1E81F               shr      eax, 31
       C1F90E               sar      ecx, 14
       03C8                 add      ecx, eax
       69C90180FFFF         imul     ecx, ecx, -0x7FFF
       480FBF4510           movsx    rax, word  ptr [rbp+10H]
       2BC1                 sub      eax, ecx
       480FBFC8             movsx    rcx, ax

Not entirely sure why the try-finally seems to be necessary -- an x64 example did not show up until I implemented them.

@mikedn does this look like the same problem?

@mikedn

This comment has been minimized.

Show comment
Hide comment
@mikedn

mikedn Aug 15, 2018

Contributor

Here is a case for x64 that looks like the same problem:

It's similar but not quite identical. In the original case a copy assertion is generated incorrectly and the use of a "normalize on store" variable is substituted with a "normalize on load" variable. The later requires a cast to be present in the IR and substitution does not insert such a cast.

In the latest example a subrange assertion is generated due to the RHS of arg1 = (short)vr19; being short. This makes morph think that the "normalize on load" cast is not needed so it does not insert it. From there things fail in various ways. But in many cases failures are not observable, at least because the result is assigned back to a short variable.

That said, I'm not 100% sure what the bug is:

  1. Is the subrange assertion wrong?
  2. Is morph wrong for using that assertion to decide "normalize on load" cast insertion?
  3. Is it wrong to store only 8/16 bits in a "normalize on load" variable?

"Normalize on load" exists only to deal with those (fortunately rare) cases where the stores are performed outside the method - method arguments and local variables passed by reference to other methods. In those cases the subrange assertion can't obviously ever be generated and morph has to insert the cast.

So the only way that morph's use of the subrange assertion makes sense is if stores to "normalize on load" variables are actually 32 bit stores, so that the state of the upper 24/16 bits is known. Otherwise morph must always insert the cast.

Contributor

mikedn commented Aug 15, 2018

Here is a case for x64 that looks like the same problem:

It's similar but not quite identical. In the original case a copy assertion is generated incorrectly and the use of a "normalize on store" variable is substituted with a "normalize on load" variable. The later requires a cast to be present in the IR and substitution does not insert such a cast.

In the latest example a subrange assertion is generated due to the RHS of arg1 = (short)vr19; being short. This makes morph think that the "normalize on load" cast is not needed so it does not insert it. From there things fail in various ways. But in many cases failures are not observable, at least because the result is assigned back to a short variable.

That said, I'm not 100% sure what the bug is:

  1. Is the subrange assertion wrong?
  2. Is morph wrong for using that assertion to decide "normalize on load" cast insertion?
  3. Is it wrong to store only 8/16 bits in a "normalize on load" variable?

"Normalize on load" exists only to deal with those (fortunately rare) cases where the stores are performed outside the method - method arguments and local variables passed by reference to other methods. In those cases the subrange assertion can't obviously ever be generated and morph has to insert the cast.

So the only way that morph's use of the subrange assertion makes sense is if stores to "normalize on load" variables are actually 32 bit stores, so that the state of the upper 24/16 bits is known. Otherwise morph must always insert the cast.

@jakobbotsch

This comment has been minimized.

Show comment
Hide comment
@jakobbotsch

jakobbotsch Aug 15, 2018

Collaborator

Thank you, that is very enlightening. I do have a much simpler example now:

// Debug: Outputs 0
// Release: Outputs 65536
public class Program
{
    static short s_19;
    public static void Main()
    {
        M75(-1);
    }

    static void M75(short arg0)
    {
        try
        {
            arg0 = -1;
        }
        finally
        {
            arg0 &= 1;
        }

        arg0 = s_19;
        short var11 = arg0;
        System.Console.WriteLine(0 - var11);
    }
}
G_M59072_IG01:
       55                   push     rbp
       4883EC10             sub      rsp, 16
       488D6C2410           lea      rbp, [rsp+10H]
       488965F0             mov      qword ptr [rbp-10H], rsp
       894D10               mov      dword ptr [rbp+10H], ecx

G_M59072_IG02:
       480FBF0D8934E8FF     movsx    rcx, word  ptr [reloc classVar[0x41ec5278]]
       66894D10             mov      word  ptr [rbp+10H], cx
       8B4D10               mov      ecx, dword ptr [rbp+10H]
       F7D9                 neg      ecx

I will open a new issue for it tomorrow some time.

Collaborator

jakobbotsch commented Aug 15, 2018

Thank you, that is very enlightening. I do have a much simpler example now:

// Debug: Outputs 0
// Release: Outputs 65536
public class Program
{
    static short s_19;
    public static void Main()
    {
        M75(-1);
    }

    static void M75(short arg0)
    {
        try
        {
            arg0 = -1;
        }
        finally
        {
            arg0 &= 1;
        }

        arg0 = s_19;
        short var11 = arg0;
        System.Console.WriteLine(0 - var11);
    }
}
G_M59072_IG01:
       55                   push     rbp
       4883EC10             sub      rsp, 16
       488D6C2410           lea      rbp, [rsp+10H]
       488965F0             mov      qword ptr [rbp-10H], rsp
       894D10               mov      dword ptr [rbp+10H], ecx

G_M59072_IG02:
       480FBF0D8934E8FF     movsx    rcx, word  ptr [reloc classVar[0x41ec5278]]
       66894D10             mov      word  ptr [rbp+10H], cx
       8B4D10               mov      ecx, dword ptr [rbp+10H]
       F7D9                 neg      ecx

I will open a new issue for it tomorrow some time.

@mikedn

This comment has been minimized.

Show comment
Hide comment
@mikedn

mikedn Aug 18, 2018

Contributor

Nice repro!

I've done a quick test - forcing morph to always insert a cast results in a small code size regression, around 40 bytes across all FX. It's reasonable and some of the regressions can be addressed by improvements in other parts of the JIT.

Contributor

mikedn commented Aug 18, 2018

Nice repro!

I've done a quick test - forcing morph to always insert a cast results in a small code size regression, around 40 bytes across all FX. It's reasonable and some of the regressions can be addressed by improvements in other parts of the JIT.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment