New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Suggestion: markdown output for DisassemblyDiagnoser #560

Closed
JosephTremoulet opened this Issue Sep 27, 2017 · 5 comments

Comments

Projects
None yet
3 participants
@JosephTremoulet

JosephTremoulet commented Sep 27, 2017

Same as with the overall report, it would be handy to have a markdown version generated alongside the html one, in case that happens to match the format where we want to report the output.

/cc @adamsitnik

@adamsitnik adamsitnik self-assigned this Sep 27, 2017

@adamsitnik

This comment has been minimized.

Show comment
Hide comment
@adamsitnik

adamsitnik Sep 27, 2017

Member

good idea, I am going to implement it

Member

adamsitnik commented Sep 27, 2017

good idea, I am going to implement it

@adamsitnik

This comment has been minimized.

Show comment
Hide comment
@adamsitnik

adamsitnik Dec 7, 2017

Member

I have some time to implement it. Finally ;)

@JosephTremoulet what would be the best option?

Single md file with code for all benchmarks?
Single md file per benchmark? Raw (raw output from CLRM with addresses) or prettified (#546)?

Member

adamsitnik commented Dec 7, 2017

I have some time to implement it. Finally ;)

@JosephTremoulet what would be the best option?

Single md file with code for all benchmarks?
Single md file per benchmark? Raw (raw output from CLRM with addresses) or prettified (#546)?

@JosephTremoulet

This comment has been minimized.

Show comment
Hide comment
@JosephTremoulet

JosephTremoulet Dec 7, 2017

I'd be inclined to use a single file with prettified code for all benchmarks, so that including this in PRs etc. would be as simple as a single copy/paste

JosephTremoulet commented Dec 7, 2017

I'd be inclined to use a single file with prettified code for all benchmarks, so that including this in PRs etc. would be as simple as a single copy/paste

@adamsitnik adamsitnik added this to the v0.10.12 milestone Dec 7, 2017

@AndreyAkinshin AndreyAkinshin modified the milestones: v0.10.12, v0.10.x Jan 11, 2018

@AndreyAkinshin AndreyAkinshin modified the milestones: v0.10.x, v0.11.0, v0.11.x Apr 9, 2018

@adamsitnik adamsitnik modified the milestones: v0.11.x, v0.11.2 Oct 14, 2018

@adamsitnik

This comment has been minimized.

Show comment
Hide comment
@adamsitnik

adamsitnik Oct 14, 2018

Member

The new look is coming!

public class Something
{
    private int[] numbers = Enumerable.Range(0, 1000).ToArray();
    
    [Benchmark]
    public int Method()
    {
        int biggest = int.MinValue;

        for (int i = 0; i < numbers.Length; i++)
            biggest = Max(biggest, numbers[i]);

        return biggest;
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    int Max(int left, int right) => left > right ? left : right;
}
; BenchmarkDotNet.Samples.Something.Method()
       mov     eax,80000000h
       xor     edi,edi
       mov     rcx,qword ptr [rsi+8]
       cmp     dword ptr [rcx+8],0
       jle     M00_L01
M00_L00:
       mov     edx,eax
       mov     rcx,qword ptr [rsi+8]
       cmp     edi,dword ptr [rcx+8]
       jae     00007ffc`42ea2197
       movsxd  r8,edi
       mov     r8d,dword ptr [rcx+r8*4+10h]
       mov     rcx,rsi
       call    BenchmarkDotNet.Samples.Something.Max(Int32, Int32)
       inc     edi
       mov     rdx,qword ptr [rsi+8]
       cmp     dword ptr [rdx+8],edi
       jg      M00_L00
M00_L01:
       add     rsp,28h
; Total bytes of code 59
; BenchmarkDotNet.Samples.Something.Max(Int32, Int32)
       cmp     edx,r8d
       jg      M01_L00
       mov     eax,r8d
       ret
M01_L00:
       mov     eax,edx
; Total bytes of code 11
Member

adamsitnik commented Oct 14, 2018

The new look is coming!

public class Something
{
    private int[] numbers = Enumerable.Range(0, 1000).ToArray();
    
    [Benchmark]
    public int Method()
    {
        int biggest = int.MinValue;

        for (int i = 0; i < numbers.Length; i++)
            biggest = Max(biggest, numbers[i]);

        return biggest;
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    int Max(int left, int right) => left > right ? left : right;
}
; BenchmarkDotNet.Samples.Something.Method()
       mov     eax,80000000h
       xor     edi,edi
       mov     rcx,qword ptr [rsi+8]
       cmp     dword ptr [rcx+8],0
       jle     M00_L01
M00_L00:
       mov     edx,eax
       mov     rcx,qword ptr [rsi+8]
       cmp     edi,dword ptr [rcx+8]
       jae     00007ffc`42ea2197
       movsxd  r8,edi
       mov     r8d,dword ptr [rcx+r8*4+10h]
       mov     rcx,rsi
       call    BenchmarkDotNet.Samples.Something.Max(Int32, Int32)
       inc     edi
       mov     rdx,qword ptr [rsi+8]
       cmp     dword ptr [rdx+8],edi
       jg      M00_L00
M00_L01:
       add     rsp,28h
; Total bytes of code 59
; BenchmarkDotNet.Samples.Something.Max(Int32, Int32)
       cmp     edx,r8d
       jg      M01_L00
       mov     eax,r8d
       ret
M01_L00:
       mov     eax,edx
; Total bytes of code 11
@adamsitnik

This comment has been minimized.

Show comment
Hide comment
@adamsitnik

adamsitnik Oct 16, 2018

Member

Output for some real-world benchmark (I love it):

; Microsoft.ML.CpuMath.PerformanceTests.ReproCase.Direct()
       push    rdi
       push    rsi
       sub     rsp,48h
       vzeroupper
       mov     rsi,rcx
       lea     rdi,[rsp+28h]
       mov     ecx,8
       xor     eax,eax
       rep     stos dword ptr [rdi]
       mov     rcx,rsi
       mov     rdx,qword ptr [rcx+8]
       mov     eax,dword ptr [rcx+18h]
       mov     r8d,eax
       test    rdx,rdx
       jne     M00_L00
       test    r8d,r8d
       jne     M00_L04
       xor     r9d,r9d
       xor     r8d,r8d
       jmp     M00_L01
M00_L00:
       cmp     dword ptr [rdx+8],0
       jb      M00_L05
       cmp     dword ptr [rdx+8],r8d
       jb      M00_L05
       lea     r9,[rdx+10h]
M00_L01:
       mov     rcx,qword ptr [rcx+10h]
       test    rcx,rcx
       jne     M00_L02
       test    eax,eax
       jne     M00_L06
       xor     edx,edx
       xor     eax,eax
       jmp     M00_L03
M00_L02:
       cmp     dword ptr [rcx+8],0
       jb      M00_L07
       cmp     dword ptr [rcx+8],eax
       jb      M00_L07
       lea     rdx,[rcx+10h]
M00_L03:
       lea     rcx,[rsp+38h]
       mov     qword ptr [rcx],r9
       mov     dword ptr [rcx+8],r8d
       lea     rcx,[rsp+28h]
       mov     qword ptr [rcx],rdx
       mov     dword ptr [rcx+8],eax
       lea     rcx,[rsp+38h]
       lea     rdx,[rsp+28h]
       call    Microsoft.ML.Runtime.Internal.CpuMath.AvxIntrinsics.DotU(System.Span`1<Single>, System.Span`1<Single>)
       nop
       add     rsp,48h
       pop     rsi
       pop     rdi
       ret
M00_L04:
       call    System.ThrowHelper.ThrowArgumentOutOfRangeException()
       int     3
M00_L05:
       call    System.ThrowHelper.ThrowArgumentOutOfRangeException()
       int     3
M00_L06:
       call    System.ThrowHelper.ThrowArgumentOutOfRangeException()
       int     3
M00_L07:
       call    System.ThrowHelper.ThrowArgumentOutOfRangeException()
       int     3
       add     byte ptr [rax],al
       add     byte ptr [rcx],bl
       ???
       add     eax,dword ptr [rax]
       ???
; Total bytes of code 185
; Microsoft.ML.Runtime.Internal.CpuMath.AvxIntrinsics.DotU(System.Span`1<Single>, System.Span`1<Single>)
       sub     rsp,18h
       vzeroupper
       xor     eax,eax
       mov     qword ptr [rsp+10h],rax
       mov     qword ptr [rsp+8],rax
       mov     eax,dword ptr [rcx+8]
       test    eax,eax
       jne     M01_L00
       xor     r8d,r8d
       jmp     M01_L01
M01_L00:
       mov     r8,qword ptr [rcx]
M01_L01:
       mov     qword ptr [rsp+10h],r8
       cmp     dword ptr [rdx+8],0
       jne     M01_L02
       xor     ecx,ecx
       jmp     M01_L03
M01_L02:
       mov     rcx,qword ptr [rdx]
M01_L03:
       mov     qword ptr [rsp+8],rcx
       mov     rdx,qword ptr [rsp+10h]
       movsxd  rax,eax
       lea     rax,[rdx+rax*4]
       vxorps  ymm0,ymm0,ymm0
       lea     r8,[rdx+20h]
       cmp     r8,rax
       ja      M01_L05
M01_L04:
       vmovups ymm1,ymmword ptr [rdx]
       vmovups ymm2,ymmword ptr [rcx]
       vmulps  ymm1,ymm1,ymm2
       vaddps  ymm0,ymm0,ymm1
       add     rdx,20h
       add     rcx,20h
       lea     r8,[rdx+20h]
       cmp     r8,rax
       jbe     M01_L04
M01_L05:
       vhaddps ymm0,ymm0,ymm0
       vhaddps ymm0,ymm0,ymm0
       vmovaps ymm1,ymm0
       vextractf128 xmm0,ymm0,1
       vaddss  xmm0,xmm1,xmm0
       vxorps  xmm1,xmm1,xmm1
       lea     r8,[rdx+10h]
       cmp     r8,rax
       ja      M01_L06
       vmovups xmm2,xmmword ptr [rdx]
       vmovups xmm3,xmmword ptr [rcx]
       vmulps  xmm2,xmm2,xmm3
       vaddps  xmm1,xmm1,xmm2
       add     rdx,10h
       add     rcx,10h
M01_L06:
       vhaddps xmm1,xmm1,xmm1
       vhaddps xmm1,xmm1,xmm1
       cmp     rdx,rax
       jae     M01_L08
M01_L07:
       vmovss  xmm2,dword ptr [rdx]
       vmovss  xmm3,dword ptr [rcx]
       vmulss  xmm2,xmm2,xmm3
       vaddss  xmm1,xmm1,xmm2
       add     rdx,4
       add     rcx,4
       cmp     rdx,rax
       jb      M01_L07
M01_L08:
       vaddss  xmm0,xmm1,xmm0
       vzeroupper
       add     rsp,18h
       ret
       add     byte ptr [rax],al
       add     byte ptr [rcx],bl
       add     al,1
       add     byte ptr [rdx],al
       add     byte ptr [rax],al
       add     byte ptr [rax],al
       add     al,bl
       xchg    eax,esp
       outs    dx,byte ptr [rsi]
       mov     eax,7FF9h
       nop     dword ptr [rax+rax]
; Total bytes of code 277
Member

adamsitnik commented Oct 16, 2018

Output for some real-world benchmark (I love it):

; Microsoft.ML.CpuMath.PerformanceTests.ReproCase.Direct()
       push    rdi
       push    rsi
       sub     rsp,48h
       vzeroupper
       mov     rsi,rcx
       lea     rdi,[rsp+28h]
       mov     ecx,8
       xor     eax,eax
       rep     stos dword ptr [rdi]
       mov     rcx,rsi
       mov     rdx,qword ptr [rcx+8]
       mov     eax,dword ptr [rcx+18h]
       mov     r8d,eax
       test    rdx,rdx
       jne     M00_L00
       test    r8d,r8d
       jne     M00_L04
       xor     r9d,r9d
       xor     r8d,r8d
       jmp     M00_L01
M00_L00:
       cmp     dword ptr [rdx+8],0
       jb      M00_L05
       cmp     dword ptr [rdx+8],r8d
       jb      M00_L05
       lea     r9,[rdx+10h]
M00_L01:
       mov     rcx,qword ptr [rcx+10h]
       test    rcx,rcx
       jne     M00_L02
       test    eax,eax
       jne     M00_L06
       xor     edx,edx
       xor     eax,eax
       jmp     M00_L03
M00_L02:
       cmp     dword ptr [rcx+8],0
       jb      M00_L07
       cmp     dword ptr [rcx+8],eax
       jb      M00_L07
       lea     rdx,[rcx+10h]
M00_L03:
       lea     rcx,[rsp+38h]
       mov     qword ptr [rcx],r9
       mov     dword ptr [rcx+8],r8d
       lea     rcx,[rsp+28h]
       mov     qword ptr [rcx],rdx
       mov     dword ptr [rcx+8],eax
       lea     rcx,[rsp+38h]
       lea     rdx,[rsp+28h]
       call    Microsoft.ML.Runtime.Internal.CpuMath.AvxIntrinsics.DotU(System.Span`1<Single>, System.Span`1<Single>)
       nop
       add     rsp,48h
       pop     rsi
       pop     rdi
       ret
M00_L04:
       call    System.ThrowHelper.ThrowArgumentOutOfRangeException()
       int     3
M00_L05:
       call    System.ThrowHelper.ThrowArgumentOutOfRangeException()
       int     3
M00_L06:
       call    System.ThrowHelper.ThrowArgumentOutOfRangeException()
       int     3
M00_L07:
       call    System.ThrowHelper.ThrowArgumentOutOfRangeException()
       int     3
       add     byte ptr [rax],al
       add     byte ptr [rcx],bl
       ???
       add     eax,dword ptr [rax]
       ???
; Total bytes of code 185
; Microsoft.ML.Runtime.Internal.CpuMath.AvxIntrinsics.DotU(System.Span`1<Single>, System.Span`1<Single>)
       sub     rsp,18h
       vzeroupper
       xor     eax,eax
       mov     qword ptr [rsp+10h],rax
       mov     qword ptr [rsp+8],rax
       mov     eax,dword ptr [rcx+8]
       test    eax,eax
       jne     M01_L00
       xor     r8d,r8d
       jmp     M01_L01
M01_L00:
       mov     r8,qword ptr [rcx]
M01_L01:
       mov     qword ptr [rsp+10h],r8
       cmp     dword ptr [rdx+8],0
       jne     M01_L02
       xor     ecx,ecx
       jmp     M01_L03
M01_L02:
       mov     rcx,qword ptr [rdx]
M01_L03:
       mov     qword ptr [rsp+8],rcx
       mov     rdx,qword ptr [rsp+10h]
       movsxd  rax,eax
       lea     rax,[rdx+rax*4]
       vxorps  ymm0,ymm0,ymm0
       lea     r8,[rdx+20h]
       cmp     r8,rax
       ja      M01_L05
M01_L04:
       vmovups ymm1,ymmword ptr [rdx]
       vmovups ymm2,ymmword ptr [rcx]
       vmulps  ymm1,ymm1,ymm2
       vaddps  ymm0,ymm0,ymm1
       add     rdx,20h
       add     rcx,20h
       lea     r8,[rdx+20h]
       cmp     r8,rax
       jbe     M01_L04
M01_L05:
       vhaddps ymm0,ymm0,ymm0
       vhaddps ymm0,ymm0,ymm0
       vmovaps ymm1,ymm0
       vextractf128 xmm0,ymm0,1
       vaddss  xmm0,xmm1,xmm0
       vxorps  xmm1,xmm1,xmm1
       lea     r8,[rdx+10h]
       cmp     r8,rax
       ja      M01_L06
       vmovups xmm2,xmmword ptr [rdx]
       vmovups xmm3,xmmword ptr [rcx]
       vmulps  xmm2,xmm2,xmm3
       vaddps  xmm1,xmm1,xmm2
       add     rdx,10h
       add     rcx,10h
M01_L06:
       vhaddps xmm1,xmm1,xmm1
       vhaddps xmm1,xmm1,xmm1
       cmp     rdx,rax
       jae     M01_L08
M01_L07:
       vmovss  xmm2,dword ptr [rdx]
       vmovss  xmm3,dword ptr [rcx]
       vmulss  xmm2,xmm2,xmm3
       vaddss  xmm1,xmm1,xmm2
       add     rdx,4
       add     rcx,4
       cmp     rdx,rax
       jb      M01_L07
M01_L08:
       vaddss  xmm0,xmm1,xmm0
       vzeroupper
       add     rsp,18h
       ret
       add     byte ptr [rax],al
       add     byte ptr [rcx],bl
       add     al,1
       add     byte ptr [rdx],al
       add     byte ptr [rax],al
       add     byte ptr [rax],al
       add     al,bl
       xchg    eax,esp
       outs    dx,byte ptr [rsi]
       mov     eax,7FF9h
       nop     dword ptr [rax+rax]
; Total bytes of code 277
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment