New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/compile: reduce binary bloat for slice literals of interfaces #29573

Open
dsnet opened this Issue Jan 4, 2019 · 6 comments

Comments

Projects
None yet
3 participants
@dsnet
Copy link
Member

dsnet commented Jan 4, 2019

Using go1.12

Consider the following snippet:

func makeLiteral() []interface{} {
	return []interface{}{
		(uint32)(0),
		(tar.Format)(0),
		(*tar.Header)(nil),
		(*tar.Reader)(nil),
		(*tar.Writer)(nil),
		(*zip.File)(nil),
		(*zip.FileHeader)(nil),
		(*zip.ReadCloser)(nil),
		(*zip.Reader)(nil),
		(*zip.Writer)(nil),
	}
}
"".makeLiteral STEXT size=688 args=0x18 locals=0x18
	0x0000 00000 (main.go:10)	TEXT	"".makeLiteral(SB), $24-24
	0x0000 00000 (main.go:10)	MOVQ	(TLS), CX
	0x0009 00009 (main.go:10)	CMPQ	SP, 16(CX)
	0x000d 00013 (main.go:10)	JLS	678
	0x0013 00019 (main.go:10)	SUBQ	$24, SP
	0x0017 00023 (main.go:10)	MOVQ	BP, 16(SP)
	0x001c 00028 (main.go:10)	LEAQ	16(SP), BP
	0x0021 00033 (main.go:11)	LEAQ	type.[10]interface {}(SB), AX
	0x0028 00040 (main.go:11)	MOVQ	AX, (SP)
	0x002c 00044 (main.go:11)	CALL	runtime.newobject(SB)
	0x0031 00049 (main.go:11)	MOVQ	8(SP), AX
	0x0036 00054 (main.go:12)	LEAQ	type.uint32(SB), CX
	0x003d 00061 (main.go:12)	MOVQ	CX, (AX)
	0x0040 00064 (main.go:12)	CMPL	runtime.writeBarrier(SB), $0
	0x0047 00071 (main.go:12)	JNE	651
	0x004d 00077 (main.go:12)	LEAQ	"".statictmp_0(SB), CX
	0x0054 00084 (main.go:12)	MOVQ	CX, 8(AX)
	0x0058 00088 (main.go:13)	LEAQ	type.archive/tar.Format(SB), CX
	0x005f 00095 (main.go:13)	MOVQ	CX, 16(AX)
	0x0063 00099 (main.go:13)	CMPL	runtime.writeBarrier(SB), $0
	0x006a 00106 (main.go:13)	JNE	624
	0x0070 00112 (main.go:13)	LEAQ	"".statictmp_1(SB), CX
	0x0077 00119 (main.go:13)	MOVQ	CX, 24(AX)
	0x007b 00123 (main.go:14)	MOVQ	"".statictmp_2(SB), CX
	0x0082 00130 (main.go:14)	LEAQ	type.*archive/tar.Header(SB), DX
	0x0089 00137 (main.go:14)	MOVQ	DX, 32(AX)
	0x008d 00141 (main.go:14)	CMPL	runtime.writeBarrier(SB), $0
	0x0094 00148 (main.go:14)	JNE	601
	0x009a 00154 (main.go:14)	MOVQ	CX, 40(AX)
	0x009e 00158 (main.go:15)	MOVQ	"".statictmp_3(SB), CX
	0x00a5 00165 (main.go:15)	LEAQ	type.*archive/tar.Reader(SB), DX
	0x00ac 00172 (main.go:15)	MOVQ	DX, 48(AX)
	0x00b0 00176 (main.go:15)	CMPL	runtime.writeBarrier(SB), $0
	0x00b7 00183 (main.go:15)	JNE	578
	0x00bd 00189 (main.go:15)	MOVQ	CX, 56(AX)
	0x00c1 00193 (main.go:16)	MOVQ	"".statictmp_4(SB), CX
	0x00c8 00200 (main.go:16)	LEAQ	type.*archive/tar.Writer(SB), DX
	0x00cf 00207 (main.go:16)	MOVQ	DX, 64(AX)
	0x00d3 00211 (main.go:16)	CMPL	runtime.writeBarrier(SB), $0
	0x00da 00218 (main.go:16)	JNE	555
	0x00e0 00224 (main.go:16)	MOVQ	CX, 72(AX)
	0x00e4 00228 (main.go:17)	MOVQ	"".statictmp_5(SB), CX
	0x00eb 00235 (main.go:17)	LEAQ	type.*archive/zip.File(SB), DX
	0x00f2 00242 (main.go:17)	MOVQ	DX, 80(AX)
	0x00f6 00246 (main.go:17)	CMPL	runtime.writeBarrier(SB), $0
	0x00fd 00253 (main.go:17)	JNE	532
	0x0103 00259 (main.go:17)	MOVQ	CX, 88(AX)
	0x0107 00263 (main.go:18)	MOVQ	"".statictmp_6(SB), CX
	0x010e 00270 (main.go:18)	LEAQ	type.*archive/zip.FileHeader(SB), DX
	0x0115 00277 (main.go:18)	MOVQ	DX, 96(AX)
	0x0119 00281 (main.go:18)	CMPL	runtime.writeBarrier(SB), $0
	0x0120 00288 (main.go:18)	JNE	509
	0x0126 00294 (main.go:18)	MOVQ	CX, 104(AX)
	0x012a 00298 (main.go:19)	MOVQ	"".statictmp_7(SB), CX
	0x0131 00305 (main.go:19)	LEAQ	type.*archive/zip.ReadCloser(SB), DX
	0x0138 00312 (main.go:19)	MOVQ	DX, 112(AX)
	0x013c 00316 (main.go:19)	CMPL	runtime.writeBarrier(SB), $0
	0x0143 00323 (main.go:19)	JNE	486
	0x0149 00329 (main.go:19)	MOVQ	CX, 120(AX)
	0x014d 00333 (main.go:20)	MOVQ	"".statictmp_8(SB), CX
	0x0154 00340 (main.go:20)	LEAQ	type.*archive/zip.Reader(SB), DX
	0x015b 00347 (main.go:20)	MOVQ	DX, 128(AX)
	0x0162 00354 (main.go:20)	CMPL	runtime.writeBarrier(SB), $0
	0x0169 00361 (main.go:20)	JNE	463
	0x016b 00363 (main.go:20)	MOVQ	CX, 136(AX)
	0x0172 00370 (main.go:21)	MOVQ	"".statictmp_9(SB), CX
	0x0179 00377 (main.go:21)	LEAQ	type.*archive/zip.Writer(SB), DX
	0x0180 00384 (main.go:21)	MOVQ	DX, 144(AX)
	0x0187 00391 (main.go:21)	CMPL	runtime.writeBarrier(SB), $0
	0x018e 00398 (main.go:21)	JNE	440
	0x0190 00400 (main.go:21)	MOVQ	CX, 152(AX)
	0x0197 00407 (main.go:11)	MOVQ	AX, "".~r0+32(SP)
	0x019c 00412 (main.go:11)	MOVQ	$10, "".~r0+40(SP)
	0x01a5 00421 (main.go:11)	MOVQ	$10, "".~r0+48(SP)
	0x01ae 00430 (main.go:11)	MOVQ	16(SP), BP
	0x01b3 00435 (main.go:11)	ADDQ	$24, SP
	0x01b7 00439 (main.go:11)	RET
	0x01b8 00440 (main.go:21)	LEAQ	152(AX), DI
	0x01bf 00447 (main.go:11)	MOVQ	AX, DX
	0x01c2 00450 (main.go:21)	MOVQ	CX, AX
	0x01c5 00453 (main.go:21)	CALL	runtime.gcWriteBarrier(SB)
	0x01ca 00458 (main.go:11)	MOVQ	DX, AX
	0x01cd 00461 (main.go:21)	JMP	407
	0x01cf 00463 (main.go:20)	LEAQ	136(AX), DI
	0x01d6 00470 (main.go:11)	MOVQ	AX, DX
	0x01d9 00473 (main.go:20)	MOVQ	CX, AX
	0x01dc 00476 (main.go:20)	CALL	runtime.gcWriteBarrier(SB)
	0x01e1 00481 (main.go:21)	MOVQ	DX, AX
	0x01e4 00484 (main.go:20)	JMP	370
	0x01e6 00486 (main.go:19)	LEAQ	120(AX), DI
	0x01ea 00490 (main.go:11)	MOVQ	AX, DX
	0x01ed 00493 (main.go:19)	MOVQ	CX, AX
	0x01f0 00496 (main.go:19)	CALL	runtime.gcWriteBarrier(SB)
	0x01f5 00501 (main.go:20)	MOVQ	DX, AX
	0x01f8 00504 (main.go:19)	JMP	333
	0x01fd 00509 (main.go:18)	LEAQ	104(AX), DI
	0x0201 00513 (main.go:11)	MOVQ	AX, DX
	0x0204 00516 (main.go:18)	MOVQ	CX, AX
	0x0207 00519 (main.go:18)	CALL	runtime.gcWriteBarrier(SB)
	0x020c 00524 (main.go:19)	MOVQ	DX, AX
	0x020f 00527 (main.go:18)	JMP	298
	0x0214 00532 (main.go:17)	LEAQ	88(AX), DI
	0x0218 00536 (main.go:11)	MOVQ	AX, DX
	0x021b 00539 (main.go:17)	MOVQ	CX, AX
	0x021e 00542 (main.go:17)	CALL	runtime.gcWriteBarrier(SB)
	0x0223 00547 (main.go:18)	MOVQ	DX, AX
	0x0226 00550 (main.go:17)	JMP	263
	0x022b 00555 (main.go:16)	LEAQ	72(AX), DI
	0x022f 00559 (main.go:11)	MOVQ	AX, DX
	0x0232 00562 (main.go:16)	MOVQ	CX, AX
	0x0235 00565 (main.go:16)	CALL	runtime.gcWriteBarrier(SB)
	0x023a 00570 (main.go:17)	MOVQ	DX, AX
	0x023d 00573 (main.go:16)	JMP	228
	0x0242 00578 (main.go:15)	LEAQ	56(AX), DI
	0x0246 00582 (main.go:11)	MOVQ	AX, DX
	0x0249 00585 (main.go:15)	MOVQ	CX, AX
	0x024c 00588 (main.go:15)	CALL	runtime.gcWriteBarrier(SB)
	0x0251 00593 (main.go:16)	MOVQ	DX, AX
	0x0254 00596 (main.go:15)	JMP	193
	0x0259 00601 (main.go:14)	LEAQ	40(AX), DI
	0x025d 00605 (main.go:11)	MOVQ	AX, DX
	0x0260 00608 (main.go:14)	MOVQ	CX, AX
	0x0263 00611 (main.go:14)	CALL	runtime.gcWriteBarrier(SB)
	0x0268 00616 (main.go:15)	MOVQ	DX, AX
	0x026b 00619 (main.go:14)	JMP	158
	0x0270 00624 (main.go:13)	LEAQ	24(AX), DI
	0x0274 00628 (main.go:11)	MOVQ	AX, CX
	0x0277 00631 (main.go:13)	LEAQ	"".statictmp_1(SB), AX
	0x027e 00638 (main.go:13)	CALL	runtime.gcWriteBarrier(SB)
	0x0283 00643 (main.go:14)	MOVQ	CX, AX
	0x0286 00646 (main.go:13)	JMP	123
	0x028b 00651 (main.go:12)	LEAQ	8(AX), DI
	0x028f 00655 (main.go:11)	MOVQ	AX, CX
	0x0292 00658 (main.go:12)	LEAQ	"".statictmp_0(SB), AX
	0x0299 00665 (main.go:12)	CALL	runtime.gcWriteBarrier(SB)
	0x029e 00670 (main.go:13)	MOVQ	CX, AX
	0x02a1 00673 (main.go:12)	JMP	88
	0x02a6 00678 (main.go:12)	NOP
	0x02a6 00678 (main.go:10)	CALL	runtime.morestack_noctxt(SB)
	0x02ab 00683 (main.go:10)	JMP	0

In my program I have many of these slice literals, some with thousands of entries. The generated code is ~60B/entry, when I expect that this could theoretically be closer to 8B/entry for the *rtype pointer.

\cc @randall77

@dsnet dsnet added the Performance label Jan 4, 2019

@randall77

This comment has been minimized.

Copy link
Contributor

randall77 commented Jan 4, 2019

This is a case where removing the write barrier could help a lot.

To remove the write barrier we need to know 2 things:

  1. The location being written to doesn't have a heap pointer in it.
  2. The pointer being written is not a pointer to a heap object.

The latter is easy, the addresses of globals and nil pointers are not heap pointers.

The former is harder. We are writing to a newly allocated object, so it starts out zeroed, but in order to be sure each write is to a zeroed location we need to understand that all the writes are to distinct parts of the newly allocated object.

This is the kind of thing that https://go-review.googlesource.com/c/go/+/151498 was supposed to help with, but it can't quite prove it's safe to remove the write barrier in this case.

It shouldn't be hard to get this right for the stylized initializations that are done for slice literals and friends. It just needs a bit more analysis in the write barrier insertion pass.

@randall77 randall77 added this to the Go1.13 milestone Jan 4, 2019

@randall77

This comment has been minimized.

Copy link
Contributor

randall77 commented Jan 4, 2019

Another possibility to solve this issue is to make it so we initialize a statictmp with the desired contents and do just one typedmemmove to initialize the whole backing store.
It looks like the statictmp builder doesn't realize that some concrete->interface conversions can be done at compile time. That we could fix. We already do it for globals, like:

var y int
var x interface{} = &y
@randall77

This comment has been minimized.

Copy link
Contributor

randall77 commented Jan 4, 2019

Note that this does work currently if the slice literal is at global scope:

var x = []interface{}{
	(uint32)(0),
	(tar.Format)(0),
	(*tar.Header)(nil),
	(*tar.Reader)(nil),
	(*tar.Writer)(nil),
	(*zip.File)(nil),
	(*zip.FileHeader)(nil),
	(*zip.ReadCloser)(nil),
	(*zip.Reader)(nil),
	(*zip.Writer)(nil),
}
@randall77

This comment has been minimized.

Copy link
Contributor

randall77 commented Jan 4, 2019

Generates just:

"".x SDATA size=24
	0x0000 00 00 00 00 00 00 00 00 0a 00 00 00 00 00 00 00  ................
	0x0010 0a 00 00 00 00 00 00 00                          ........
	rel 0+8 t=1 "".statictmp_0+0
"".statictmp_0 SDATA size=160
	0x0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0x0010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0x0020 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0x0030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0x0040 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0x0050 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0x0060 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0x0070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0x0080 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0x0090 00 00 00 00 00 00 00 00                          ........
	rel 0+8 t=1 type.uint32+0
	rel 8+8 t=1 "".statictmp_1+0
	rel 16+8 t=1 type.archive/tar.Format+0
	rel 24+8 t=1 "".statictmp_2+0
	rel 32+8 t=1 type.*archive/tar.Header+0
	rel 48+8 t=1 type.*archive/tar.Reader+0
	rel 64+8 t=1 type.*archive/tar.Writer+0
	rel 80+8 t=1 type.*archive/zip.File+0
	rel 96+8 t=1 type.*archive/zip.FileHeader+0
	rel 112+8 t=1 type.*archive/zip.ReadCloser+0
	rel 128+8 t=1 type.*archive/zip.Reader+0
	rel 144+8 t=1 type.*archive/zip.Writer+0
"".statictmp_1 SNOPTRBSS size=4
"".statictmp_2 SNOPTRBSS size=8
@gopherbot

This comment has been minimized.

Copy link

gopherbot commented Jan 5, 2019

Change https://golang.org/cl/156363 mentions this issue: cmd/compile: better write barrier removal when initializing new objects

@gopherbot

This comment has been minimized.

Copy link

gopherbot commented Jan 14, 2019

Change https://golang.org/cl/157799 mentions this issue: runtime: keep FuncForPC from crashing for PCs between functions

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment