type T struct {
buf [16]byte
}
func (t *T) readU32() uint32 {
x := binary.LittleEndian.Uint32(t.buf[:])
t.shiftBuf(4)
return x
}
func (t *T) shiftBuf(n uint) {
x := binary.LittleEndian.Uint64(t.buf[:])
y := binary.LittleEndian.Uint64(t.buf[8:])
shift := 8 * n
x >>= shift
x |= y << ((64 - shift) % 64)
y >>= shift
binary.LittleEndian.PutUint64(t.buf[:], x)
binary.LittleEndian.PutUint64(t.buf[8:], y)
}
A single MOVL for LittleEndian.Uint32, and a single MOVQ for each LittleEndian.(Put)?Uint64, or something similar.
(*T).shiftBuf gets inlined. The Uint32 call then gets broken up into four byte loads. The first Uint64 becomes a MOVL to fetch the upper word, which is then combined with the Uint32 result. The final PutUint64 is broken up into two MOVLs. With gotip:
"".(*T).readU32 STEXT nosplit size=115 args=0x8 locals=0x0 funcid=0x0 align=0x0
0x0000 00000 (t.go:9) TEXT "".(*T).readU32(SB), NOSPLIT|ABIInternal, $0-8
0x0000 00000 (t.go:9) FUNCDATA $0, gclocals·c2071639b6d8d876272b6494fd4db694cb4563c618e97968c244b75ebb6010cc(SB)
0x0000 00000 (t.go:9) FUNCDATA $1, gclocals·27917eed0c3b3bbbded907160bb0e979825b780d6fad406e47efba824cbdf65b(SB)
0x0000 00000 (t.go:9) FUNCDATA $5, "".(*T).readU32.arginfo1(SB)
0x0000 00000 (t.go:9) FUNCDATA $6, "".(*T).readU32.argliveinfo(SB)
0x0000 00000 (t.go:9) PCDATA $3, $1
0x0000 00000 (t.go:10) XCHGL AX, AX
0x0001 00001 (<unknown line number>) NOP
0x0001 00001 (t.go:11) MOVBLZX (AX), CX
0x0004 00004 ($GOROOT/src/encoding/binary/binary.go:81) MOVBLZX 1(AX), DX
0x0008 00008 ($GOROOT/src/encoding/binary/binary.go:81) MOVBLZX 2(AX), BX
0x000c 00012 ($GOROOT/src/encoding/binary/binary.go:81) MOVBLZX 3(AX), SI
0x0010 00016 (t.go:16) XCHGL AX, AX
0x0011 00017 (t.go:17) XCHGL AX, AX
0x0012 00018 ($GOROOT/src/encoding/binary/binary.go:103) MOVL DX, DI
0x0014 00020 ($GOROOT/src/encoding/binary/binary.go:103) SHLQ $8, DX
0x0018 00024 ($GOROOT/src/encoding/binary/binary.go:103) ORQ CX, DX
0x001b 00027 ($GOROOT/src/encoding/binary/binary.go:103) MOVL BX, R8
0x001e 00030 ($GOROOT/src/encoding/binary/binary.go:103) SHLQ $16, BX
0x0022 00034 ($GOROOT/src/encoding/binary/binary.go:103) ORQ DX, BX
0x0025 00037 ($GOROOT/src/encoding/binary/binary.go:103) MOVL SI, DX
0x0027 00039 ($GOROOT/src/encoding/binary/binary.go:103) SHLQ $24, SI
0x002b 00043 ($GOROOT/src/encoding/binary/binary.go:103) ORQ BX, SI
0x002e 00046 ($GOROOT/src/encoding/binary/binary.go:104) MOVL 4(AX), BX
0x0031 00049 ($GOROOT/src/encoding/binary/binary.go:104) SHLQ $32, BX
0x0035 00053 ($GOROOT/src/encoding/binary/binary.go:104) ORQ BX, SI
0x0038 00056 (t.go:20) SHRQ $32, SI
0x003c 00060 ($GOROOT/src/encoding/binary/binary.go:104) MOVQ 8(AX), BX
0x0040 00064 (t.go:21) MOVQ BX, R9
0x0043 00067 (t.go:21) SHLQ $32, BX
0x0047 00071 (t.go:21) ORQ SI, BX
0x004a 00074 (t.go:24) XCHGL AX, AX
0x004b 00075 ($GOROOT/src/encoding/binary/binary.go:116) MOVQ BX, (AX)
0x004e 00078 (t.go:22) SHRQ $32, R9
0x0052 00082 (t.go:25) XCHGL AX, AX
0x0053 00083 ($GOROOT/src/encoding/binary/binary.go:112) MOVL R9, 8(AX)
0x0057 00087 ($GOROOT/src/encoding/binary/binary.go:116) MOVL $0, 12(AX)
0x005e 00094 ($GOROOT/src/encoding/binary/binary.go:81) SHLL $8, DI
0x0061 00097 ($GOROOT/src/encoding/binary/binary.go:81) ORL DI, CX
0x0063 00099 ($GOROOT/src/encoding/binary/binary.go:81) SHLL $16, R8
0x0067 00103 ($GOROOT/src/encoding/binary/binary.go:81) ORL CX, R8
0x006a 00106 ($GOROOT/src/encoding/binary/binary.go:81) SHLL $24, DX
0x006d 00109 ($GOROOT/src/encoding/binary/binary.go:81) ORL R8, DX
0x0070 00112 (t.go:12) MOVL DX, AX
0x0072 00114 (t.go:12) RET
GOARCH=arm64 produces byte loads and shifts too.
What version of Go are you using (
go version)?What did you do?
What did you expect to see?
A single MOVL for LittleEndian.Uint32, and a single MOVQ for each LittleEndian.(Put)?Uint64, or something similar.
What did you see instead?
(*T).shiftBuf gets inlined. The Uint32 call then gets broken up into four byte loads. The first Uint64 becomes a MOVL to fetch the upper word, which is then combined with the Uint32 result. The final PutUint64 is broken up into two MOVLs. With gotip:
GOARCH=arm64 produces byte loads and shifts too.