Skip to content

Commit a762221

Browse files
committed
runtime: rearrange mheap_.alloc* into allocSpan
This change combines the functionality of allocSpanLocked, allocManual, and alloc_m into a new method called allocSpan. While these methods' abstraction boundaries are OK when the heap lock is held throughout, they start to break down when we want finer-grained locking in the page allocator. allocSpan does just that, and only locks the heap when it absolutely has to. Piggy-backing off of work in previous CLs to make more of span initialization lockless, this change makes span initialization entirely lockless as part of the reorganization. Ultimately this change will enable us to add a lockless fast path to allocSpan. Updates #35112. Change-Id: I99875939d75fb4e958a67ac99e4a7cda44f06864 Reviewed-on: https://go-review.googlesource.com/c/go/+/196641 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
1 parent a5a6f61 commit a762221

File tree

1 file changed

+165
-160
lines changed

1 file changed

+165
-160
lines changed

src/runtime/mheap.go

+165-160
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ type mheap struct {
4242
// could self-deadlock if its stack grows with the lock held.
4343
lock mutex
4444
pages pageAlloc // page allocation data structure
45-
sweepgen uint32 // sweep generation, see comment in mspan
45+
sweepgen uint32 // sweep generation, see comment in mspan; written during STW
4646
sweepdone uint32 // all spans are swept
4747
sweepers uint32 // number of active sweepone calls
4848

@@ -848,136 +848,23 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
848848
return nFreed
849849
}
850850

851-
// alloc_m is the internal implementation of mheap.alloc.
852-
//
853-
// alloc_m must run on the system stack because it locks the heap, so
854-
// any stack growth during alloc_m would self-deadlock.
855-
//
856-
//go:systemstack
857-
func (h *mheap) alloc_m(npage uintptr, spanclass spanClass) *mspan {
858-
_g_ := getg()
859-
860-
// To prevent excessive heap growth, before allocating n pages
861-
// we need to sweep and reclaim at least n pages.
862-
if h.sweepdone == 0 {
863-
h.reclaim(npage)
864-
}
865-
866-
// Compute size information.
867-
nbytes := npage << _PageShift
868-
var elemSize, nelems uintptr
869-
if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
870-
elemSize = nbytes
871-
nelems = 1
872-
} else {
873-
elemSize = uintptr(class_to_size[sizeclass])
874-
nelems = nbytes / elemSize
875-
}
876-
877-
// Allocate mark and allocation bits before we take the heap
878-
// lock. We'll drop these on the floor if we fail to allocate
879-
// the span, but in that case we'll panic soon.
880-
gcmarkBits := newMarkBits(nelems)
881-
allocBits := newAllocBits(nelems)
882-
883-
lock(&h.lock)
884-
// transfer stats from cache to global
885-
memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
886-
_g_.m.mcache.local_scan = 0
887-
memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
888-
_g_.m.mcache.local_tinyallocs = 0
889-
890-
s := h.allocSpanLocked(npage, &memstats.heap_inuse)
891-
if s != nil {
892-
// Record span info, because gc needs to be
893-
// able to map interior pointer to containing span.
894-
atomic.Store(&s.sweepgen, h.sweepgen)
895-
h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list.
896-
s.allocCount = 0
897-
s.spanclass = spanclass
898-
s.elemsize = elemSize
899-
if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
900-
s.divShift = 0
901-
s.divMul = 0
902-
s.divShift2 = 0
903-
s.baseMask = 0
904-
905-
// Update additional stats.
906-
mheap_.largealloc += uint64(s.elemsize)
907-
mheap_.nlargealloc++
908-
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
909-
} else {
910-
m := &class_to_divmagic[sizeclass]
911-
s.divShift = m.shift
912-
s.divMul = m.mul
913-
s.divShift2 = m.shift2
914-
s.baseMask = m.baseMask
915-
}
916-
917-
// Initialize mark and allocation structures.
918-
s.freeindex = 0
919-
s.allocCache = ^uint64(0) // all 1s indicating all free.
920-
s.nelems = nelems
921-
s.gcmarkBits = gcmarkBits
922-
s.allocBits = allocBits
923-
924-
// Now that the span is filled in, set its state. This
925-
// is a publication barrier for the other fields in
926-
// the span. While valid pointers into this span
927-
// should never be visible until the span is returned,
928-
// if the garbage collector finds an invalid pointer,
929-
// access to the span may race with initialization of
930-
// the span. We resolve this race by atomically
931-
// setting the state after the span is fully
932-
// initialized, and atomically checking the state in
933-
// any situation where a pointer is suspect.
934-
s.state.set(mSpanInUse)
935-
936-
// Mark in-use span in arena page bitmap.
937-
//
938-
// This publishes the span to the page sweeper, so
939-
// it's imperative that the span be completely initialized
940-
// prior to this line.
941-
arena, pageIdx, pageMask := pageIndexOf(s.base())
942-
atomic.Or8(&arena.pageInUse[pageIdx], pageMask)
943-
944-
// Update related page sweeper stats.
945-
atomic.Xadd64(&h.pagesInUse, int64(npage))
946-
}
947-
// heap_scan and heap_live were updated.
948-
if gcBlackenEnabled != 0 {
949-
gcController.revise()
950-
}
951-
952-
if trace.enabled {
953-
traceHeapAlloc()
954-
}
955-
956-
// h.spans is accessed concurrently without synchronization
957-
// from other threads. Hence, there must be a store/store
958-
// barrier here to ensure the writes to h.spans above happen
959-
// before the caller can publish a pointer p to an object
960-
// allocated from s. As soon as this happens, the garbage
961-
// collector running on another processor could read p and
962-
// look up s in h.spans. The unlock acts as the barrier to
963-
// order these writes. On the read side, the data dependency
964-
// between p and the index in h.spans orders the reads.
965-
unlock(&h.lock)
966-
return s
967-
}
968-
969851
// alloc allocates a new span of npage pages from the GC'd heap.
970852
//
971853
// spanclass indicates the span's size class and scannability.
972854
//
973855
// If needzero is true, the memory for the returned span will be zeroed.
974-
func (h *mheap) alloc(npage uintptr, spanclass spanClass, needzero bool) *mspan {
856+
func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan {
975857
// Don't do any operations that lock the heap on the G stack.
976858
// It might trigger stack growth, and the stack growth code needs
977859
// to be able to allocate heap.
978860
var s *mspan
979861
systemstack(func() {
980-
s = h.alloc_m(npage, spanclass)
862+
// To prevent excessive heap growth, before allocating n pages
863+
// we need to sweep and reclaim at least n pages.
864+
if h.sweepdone == 0 {
865+
h.reclaim(npages)
866+
}
867+
s = h.allocSpan(npages, false, spanclass, &memstats.heap_inuse)
981868
})
982869

983870
if s != nil {
@@ -999,29 +886,12 @@ func (h *mheap) alloc(npage uintptr, spanclass spanClass, needzero bool) *mspan
999886
// The memory backing the returned span may not be zeroed if
1000887
// span.needzero is set.
1001888
//
1002-
// allocManual must be called on the system stack because it acquires
1003-
// the heap lock. See mheap for details.
889+
// allocManual must be called on the system stack because it may
890+
// acquire the heap lock via allocSpan. See mheap for details.
1004891
//
1005892
//go:systemstack
1006-
func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
1007-
lock(&h.lock)
1008-
s := h.allocSpanLocked(npage, stat)
1009-
if s != nil {
1010-
s.manualFreeList = 0
1011-
s.allocCount = 0
1012-
s.spanclass = 0
1013-
s.nelems = 0
1014-
s.elemsize = 0
1015-
s.limit = s.base() + s.npages<<_PageShift
1016-
s.state.set(mSpanManual) // Publish the span
1017-
// Manually managed memory doesn't count toward heap_sys.
1018-
mSysStatDec(&memstats.heap_sys, s.npages*pageSize)
1019-
}
1020-
1021-
// This unlock acts as a release barrier. See mheap.alloc_m.
1022-
unlock(&h.lock)
1023-
1024-
return s
893+
func (h *mheap) allocManual(npages uintptr, stat *uint64) *mspan {
894+
return h.allocSpan(npages, true, 0, stat)
1025895
}
1026896

1027897
// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
@@ -1103,43 +973,178 @@ func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
1103973
return
1104974
}
1105975

1106-
// Allocates a span of the given size. h must be locked.
1107-
// The returned span has been removed from the
1108-
// free structures, but its state is still mSpanFree.
1109-
func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
1110-
base, scav := h.pages.alloc(npage)
976+
// allocSpan allocates an mspan which owns npages worth of memory.
977+
//
978+
// If manual == false, allocSpan allocates a heap span of class spanclass
979+
// and updates heap accounting. If manual == true, allocSpan allocates a
980+
// manually-managed span (spanclass is ignored), and the caller is
981+
// responsible for any accounting related to its use of the span. Either
982+
// way, allocSpan will atomically add the bytes in the newly allocated
983+
// span to *sysStat.
984+
//
985+
// The returned span is fully initialized.
986+
//
987+
// h must not be locked.
988+
//
989+
// allocSpan must be called on the system stack both because it acquires
990+
// the heap lock and because it must block GC transitions.
991+
//
992+
//go:systemstack
993+
func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysStat *uint64) (s *mspan) {
994+
// Function-global state.
995+
gp := getg()
996+
base, scav := uintptr(0), uintptr(0)
997+
998+
// We failed to do what we need to do without the lock.
999+
lock(&h.lock)
1000+
1001+
// Try to acquire a base address.
1002+
base, scav = h.pages.alloc(npages)
11111003
if base != 0 {
11121004
goto HaveBase
11131005
}
1114-
if !h.grow(npage) {
1006+
if !h.grow(npages) {
1007+
unlock(&h.lock)
11151008
return nil
11161009
}
1117-
base, scav = h.pages.alloc(npage)
1010+
base, scav = h.pages.alloc(npages)
11181011
if base != 0 {
11191012
goto HaveBase
11201013
}
11211014
throw("grew heap, but no adequate free space found")
11221015

11231016
HaveBase:
1124-
if scav != 0 {
1125-
// sysUsed all the pages that are actually available
1126-
// in the span.
1127-
sysUsed(unsafe.Pointer(base), npage*pageSize)
1128-
mSysStatDec(&memstats.heap_released, scav)
1017+
if !manual {
1018+
// This is a heap span, so we should do some additional accounting
1019+
// which may only be done with the heap locked.
1020+
1021+
// Transfer stats from mcache to global.
1022+
memstats.heap_scan += uint64(gp.m.mcache.local_scan)
1023+
gp.m.mcache.local_scan = 0
1024+
memstats.tinyallocs += uint64(gp.m.mcache.local_tinyallocs)
1025+
gp.m.mcache.local_tinyallocs = 0
1026+
1027+
// Do some additional accounting if it's a large allocation.
1028+
if spanclass.sizeclass() == 0 {
1029+
mheap_.largealloc += uint64(npages * pageSize)
1030+
mheap_.nlargealloc++
1031+
atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
1032+
}
1033+
1034+
// Either heap_live or heap_scan could have been updated.
1035+
if gcBlackenEnabled != 0 {
1036+
gcController.revise()
1037+
}
11291038
}
11301039

1131-
s := (*mspan)(h.spanalloc.alloc())
1132-
s.init(base, npage)
1133-
if h.allocNeedsZero(base, npage) {
1040+
// Allocate an mspan object before releasing the lock.
1041+
s = (*mspan)(h.spanalloc.alloc())
1042+
unlock(&h.lock)
1043+
1044+
// Initialize the span.
1045+
s.init(base, npages)
1046+
if h.allocNeedsZero(base, npages) {
11341047
s.needzero = 1
11351048
}
1136-
h.setSpans(s.base(), npage, s)
1049+
nbytes := npages * pageSize
1050+
if manual {
1051+
s.manualFreeList = 0
1052+
s.nelems = 0
1053+
s.limit = s.base() + s.npages*pageSize
1054+
// Manually managed memory doesn't count toward heap_sys.
1055+
mSysStatDec(&memstats.heap_sys, s.npages*pageSize)
1056+
s.state.set(mSpanManual)
1057+
} else {
1058+
// We must set span properties before the span is published anywhere
1059+
// since we're not holding the heap lock.
1060+
s.spanclass = spanclass
1061+
if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
1062+
s.elemsize = nbytes
1063+
s.nelems = 1
1064+
1065+
s.divShift = 0
1066+
s.divMul = 0
1067+
s.divShift2 = 0
1068+
s.baseMask = 0
1069+
} else {
1070+
s.elemsize = uintptr(class_to_size[sizeclass])
1071+
s.nelems = nbytes / s.elemsize
1072+
1073+
m := &class_to_divmagic[sizeclass]
1074+
s.divShift = m.shift
1075+
s.divMul = m.mul
1076+
s.divShift2 = m.shift2
1077+
s.baseMask = m.baseMask
1078+
}
11371079

1080+
// Initialize mark and allocation structures.
1081+
s.freeindex = 0
1082+
s.allocCache = ^uint64(0) // all 1s indicating all free.
1083+
s.gcmarkBits = newMarkBits(s.nelems)
1084+
s.allocBits = newAllocBits(s.nelems)
1085+
1086+
// It's safe to access h.sweepgen without the heap lock because it's
1087+
// only ever updated with the world stopped and we run on the
1088+
// systemstack which blocks a STW transition.
1089+
atomic.Store(&s.sweepgen, h.sweepgen)
1090+
1091+
// Now that the span is filled in, set its state. This
1092+
// is a publication barrier for the other fields in
1093+
// the span. While valid pointers into this span
1094+
// should never be visible until the span is returned,
1095+
// if the garbage collector finds an invalid pointer,
1096+
// access to the span may race with initialization of
1097+
// the span. We resolve this race by atomically
1098+
// setting the state after the span is fully
1099+
// initialized, and atomically checking the state in
1100+
// any situation where a pointer is suspect.
1101+
s.state.set(mSpanInUse)
1102+
}
1103+
1104+
// Commit and account for any scavenged memory that the span now owns.
1105+
if scav != 0 {
1106+
// sysUsed all the pages that are actually available
1107+
// in the span since some of them might be scavenged.
1108+
sysUsed(unsafe.Pointer(base), nbytes)
1109+
mSysStatDec(&memstats.heap_released, scav)
1110+
}
11381111
// Update stats.
1139-
nbytes := npage * pageSize
1140-
mSysStatInc(stat, nbytes)
1112+
mSysStatInc(sysStat, nbytes)
11411113
mSysStatDec(&memstats.heap_idle, nbytes)
11421114

1115+
// Publish the span in various locations.
1116+
1117+
// This is safe to call without the lock held because the slots
1118+
// related to this span will only every be read or modified by
1119+
// this thread until pointers into the span are published or
1120+
// pageInUse is updated.
1121+
h.setSpans(s.base(), npages, s)
1122+
1123+
if !manual {
1124+
// Add to swept in-use list.
1125+
//
1126+
// This publishes the span to root marking.
1127+
//
1128+
// h.sweepgen is guaranteed to only change during STW,
1129+
// and preemption is disabled in the page allocator.
1130+
h.sweepSpans[h.sweepgen/2%2].push(s)
1131+
1132+
// Mark in-use span in arena page bitmap.
1133+
//
1134+
// This publishes the span to the page sweeper, so
1135+
// it's imperative that the span be completely initialized
1136+
// prior to this line.
1137+
arena, pageIdx, pageMask := pageIndexOf(s.base())
1138+
atomic.Or8(&arena.pageInUse[pageIdx], pageMask)
1139+
1140+
// Update related page sweeper stats.
1141+
atomic.Xadd64(&h.pagesInUse, int64(npages))
1142+
1143+
if trace.enabled {
1144+
// Trace that a heap alloc occurred.
1145+
traceHeapAlloc()
1146+
}
1147+
}
11431148
return s
11441149
}
11451150

0 commit comments

Comments
 (0)