Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
980 lines (859 sloc)
33.3 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2009 The Go Authors. All rights reserved. | |
// Use of this source code is governed by a BSD-style | |
// license that can be found in the LICENSE file. | |
// Memory statistics | |
package runtime | |
import ( | |
"runtime/internal/atomic" | |
"unsafe" | |
) | |
// Statistics. | |
// | |
// For detailed descriptions see the documentation for MemStats. | |
// Fields that differ from MemStats are further documented here. | |
// | |
// Many of these fields are updated on the fly, while others are only | |
// updated when updatememstats is called. | |
type mstats struct { | |
// General statistics. | |
alloc uint64 // bytes allocated and not yet freed | |
total_alloc uint64 // bytes allocated (even if freed) | |
sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate) | |
nlookup uint64 // number of pointer lookups (unused) | |
nmalloc uint64 // number of mallocs | |
nfree uint64 // number of frees | |
// Statistics about malloc heap. | |
// Updated atomically, or with the world stopped. | |
// | |
// Like MemStats, heap_sys and heap_inuse do not count memory | |
// in manually-managed spans. | |
heap_sys sysMemStat // virtual address space obtained from system for GC'd heap | |
heap_inuse uint64 // bytes in mSpanInUse spans | |
heap_released uint64 // bytes released to the os | |
// heap_objects is not used by the runtime directly and instead | |
// computed on the fly by updatememstats. | |
heap_objects uint64 // total number of allocated objects | |
// Statistics about stacks. | |
stacks_inuse uint64 // bytes in manually-managed stack spans; computed by updatememstats | |
stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys | |
// Statistics about allocation of low-level fixed-size structures. | |
// Protected by FixAlloc locks. | |
mspan_inuse uint64 // mspan structures | |
mspan_sys sysMemStat | |
mcache_inuse uint64 // mcache structures | |
mcache_sys sysMemStat | |
buckhash_sys sysMemStat // profiling bucket hash table | |
// Statistics about GC overhead. | |
gcWorkBufInUse uint64 // computed by updatememstats | |
gcProgPtrScalarBitsInUse uint64 // computed by updatememstats | |
gcMiscSys sysMemStat // updated atomically or during STW | |
// Miscellaneous statistics. | |
other_sys sysMemStat // updated atomically or during STW | |
// Statistics about the garbage collector. | |
// next_gc is the goal heap_live for when next GC ends. | |
// Set to ^uint64(0) if disabled. | |
// | |
// Read and written atomically, unless the world is stopped. | |
next_gc uint64 | |
// Protected by mheap or stopping the world during GC. | |
last_gc_unix uint64 // last gc (in unix time) | |
pause_total_ns uint64 | |
pause_ns [256]uint64 // circular buffer of recent gc pause lengths | |
pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970) | |
numgc uint32 | |
numforcedgc uint32 // number of user-forced GCs | |
gc_cpu_fraction float64 // fraction of CPU time used by GC | |
enablegc bool | |
debuggc bool | |
// Statistics about allocation size classes. | |
by_size [_NumSizeClasses]struct { | |
size uint32 | |
nmalloc uint64 | |
nfree uint64 | |
} | |
// Add an uint32 for even number of size classes to align below fields | |
// to 64 bits for atomic operations on 32 bit platforms. | |
_ [1 - _NumSizeClasses%2]uint32 | |
last_gc_nanotime uint64 // last gc (monotonic time) | |
tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly | |
last_next_gc uint64 // next_gc for the previous GC | |
last_heap_inuse uint64 // heap_inuse at mark termination of the previous GC | |
// triggerRatio is the heap growth ratio that triggers marking. | |
// | |
// E.g., if this is 0.6, then GC should start when the live | |
// heap has reached 1.6 times the heap size marked by the | |
// previous cycle. This should be ≤ GOGC/100 so the trigger | |
// heap size is less than the goal heap size. This is set | |
// during mark termination for the next cycle's trigger. | |
triggerRatio float64 | |
// gc_trigger is the heap size that triggers marking. | |
// | |
// When heap_live ≥ gc_trigger, the mark phase will start. | |
// This is also the heap size by which proportional sweeping | |
// must be complete. | |
// | |
// This is computed from triggerRatio during mark termination | |
// for the next cycle's trigger. | |
gc_trigger uint64 | |
// heap_live is the number of bytes considered live by the GC. | |
// That is: retained by the most recent GC plus allocated | |
// since then. heap_live <= alloc, since alloc includes unmarked | |
// objects that have not yet been swept (and hence goes up as we | |
// allocate and down as we sweep) while heap_live excludes these | |
// objects (and hence only goes up between GCs). | |
// | |
// This is updated atomically without locking. To reduce | |
// contention, this is updated only when obtaining a span from | |
// an mcentral and at this point it counts all of the | |
// unallocated slots in that span (which will be allocated | |
// before that mcache obtains another span from that | |
// mcentral). Hence, it slightly overestimates the "true" live | |
// heap size. It's better to overestimate than to | |
// underestimate because 1) this triggers the GC earlier than | |
// necessary rather than potentially too late and 2) this | |
// leads to a conservative GC rate rather than a GC rate that | |
// is potentially too low. | |
// | |
// Reads should likewise be atomic (or during STW). | |
// | |
// Whenever this is updated, call traceHeapAlloc() and | |
// gcController.revise(). | |
heap_live uint64 | |
// heap_scan is the number of bytes of "scannable" heap. This | |
// is the live heap (as counted by heap_live), but omitting | |
// no-scan objects and no-scan tails of objects. | |
// | |
// Whenever this is updated, call gcController.revise(). | |
// | |
// Read and written atomically or with the world stopped. | |
heap_scan uint64 | |
// heap_marked is the number of bytes marked by the previous | |
// GC. After mark termination, heap_live == heap_marked, but | |
// unlike heap_live, heap_marked does not change until the | |
// next mark termination. | |
heap_marked uint64 | |
// heapStats is a set of statistics | |
heapStats consistentHeapStats | |
// _ uint32 // ensure gcPauseDist is aligned | |
// gcPauseDist represents the distribution of all GC-related | |
// application pauses in the runtime. | |
// | |
// Each individual pause is counted separately, unlike pause_ns. | |
gcPauseDist timeHistogram | |
} | |
var memstats mstats | |
// A MemStats records statistics about the memory allocator. | |
type MemStats struct { | |
// General statistics. | |
// Alloc is bytes of allocated heap objects. | |
// | |
// This is the same as HeapAlloc (see below). | |
Alloc uint64 | |
// TotalAlloc is cumulative bytes allocated for heap objects. | |
// | |
// TotalAlloc increases as heap objects are allocated, but | |
// unlike Alloc and HeapAlloc, it does not decrease when | |
// objects are freed. | |
TotalAlloc uint64 | |
// Sys is the total bytes of memory obtained from the OS. | |
// | |
// Sys is the sum of the XSys fields below. Sys measures the | |
// virtual address space reserved by the Go runtime for the | |
// heap, stacks, and other internal data structures. It's | |
// likely that not all of the virtual address space is backed | |
// by physical memory at any given moment, though in general | |
// it all was at some point. | |
Sys uint64 | |
// Lookups is the number of pointer lookups performed by the | |
// runtime. | |
// | |
// This is primarily useful for debugging runtime internals. | |
Lookups uint64 | |
// Mallocs is the cumulative count of heap objects allocated. | |
// The number of live objects is Mallocs - Frees. | |
Mallocs uint64 | |
// Frees is the cumulative count of heap objects freed. | |
Frees uint64 | |
// Heap memory statistics. | |
// | |
// Interpreting the heap statistics requires some knowledge of | |
// how Go organizes memory. Go divides the virtual address | |
// space of the heap into "spans", which are contiguous | |
// regions of memory 8K or larger. A span may be in one of | |
// three states: | |
// | |
// An "idle" span contains no objects or other data. The | |
// physical memory backing an idle span can be released back | |
// to the OS (but the virtual address space never is), or it | |
// can be converted into an "in use" or "stack" span. | |
// | |
// An "in use" span contains at least one heap object and may | |
// have free space available to allocate more heap objects. | |
// | |
// A "stack" span is used for goroutine stacks. Stack spans | |
// are not considered part of the heap. A span can change | |
// between heap and stack memory; it is never used for both | |
// simultaneously. | |
// HeapAlloc is bytes of allocated heap objects. | |
// | |
// "Allocated" heap objects include all reachable objects, as | |
// well as unreachable objects that the garbage collector has | |
// not yet freed. Specifically, HeapAlloc increases as heap | |
// objects are allocated and decreases as the heap is swept | |
// and unreachable objects are freed. Sweeping occurs | |
// incrementally between GC cycles, so these two processes | |
// occur simultaneously, and as a result HeapAlloc tends to | |
// change smoothly (in contrast with the sawtooth that is | |
// typical of stop-the-world garbage collectors). | |
HeapAlloc uint64 | |
// HeapSys is bytes of heap memory obtained from the OS. | |
// | |
// HeapSys measures the amount of virtual address space | |
// reserved for the heap. This includes virtual address space | |
// that has been reserved but not yet used, which consumes no | |
// physical memory, but tends to be small, as well as virtual | |
// address space for which the physical memory has been | |
// returned to the OS after it became unused (see HeapReleased | |
// for a measure of the latter). | |
// | |
// HeapSys estimates the largest size the heap has had. | |
HeapSys uint64 | |
// HeapIdle is bytes in idle (unused) spans. | |
// | |
// Idle spans have no objects in them. These spans could be | |
// (and may already have been) returned to the OS, or they can | |
// be reused for heap allocations, or they can be reused as | |
// stack memory. | |
// | |
// HeapIdle minus HeapReleased estimates the amount of memory | |
// that could be returned to the OS, but is being retained by | |
// the runtime so it can grow the heap without requesting more | |
// memory from the OS. If this difference is significantly | |
// larger than the heap size, it indicates there was a recent | |
// transient spike in live heap size. | |
HeapIdle uint64 | |
// HeapInuse is bytes in in-use spans. | |
// | |
// In-use spans have at least one object in them. These spans | |
// can only be used for other objects of roughly the same | |
// size. | |
// | |
// HeapInuse minus HeapAlloc estimates the amount of memory | |
// that has been dedicated to particular size classes, but is | |
// not currently being used. This is an upper bound on | |
// fragmentation, but in general this memory can be reused | |
// efficiently. | |
HeapInuse uint64 | |
// HeapReleased is bytes of physical memory returned to the OS. | |
// | |
// This counts heap memory from idle spans that was returned | |
// to the OS and has not yet been reacquired for the heap. | |
HeapReleased uint64 | |
// HeapObjects is the number of allocated heap objects. | |
// | |
// Like HeapAlloc, this increases as objects are allocated and | |
// decreases as the heap is swept and unreachable objects are | |
// freed. | |
HeapObjects uint64 | |
// Stack memory statistics. | |
// | |
// Stacks are not considered part of the heap, but the runtime | |
// can reuse a span of heap memory for stack memory, and | |
// vice-versa. | |
// StackInuse is bytes in stack spans. | |
// | |
// In-use stack spans have at least one stack in them. These | |
// spans can only be used for other stacks of the same size. | |
// | |
// There is no StackIdle because unused stack spans are | |
// returned to the heap (and hence counted toward HeapIdle). | |
StackInuse uint64 | |
// StackSys is bytes of stack memory obtained from the OS. | |
// | |
// StackSys is StackInuse, plus any memory obtained directly | |
// from the OS for OS thread stacks (which should be minimal). | |
StackSys uint64 | |
// Off-heap memory statistics. | |
// | |
// The following statistics measure runtime-internal | |
// structures that are not allocated from heap memory (usually | |
// because they are part of implementing the heap). Unlike | |
// heap or stack memory, any memory allocated to these | |
// structures is dedicated to these structures. | |
// | |
// These are primarily useful for debugging runtime memory | |
// overheads. | |
// MSpanInuse is bytes of allocated mspan structures. | |
MSpanInuse uint64 | |
// MSpanSys is bytes of memory obtained from the OS for mspan | |
// structures. | |
MSpanSys uint64 | |
// MCacheInuse is bytes of allocated mcache structures. | |
MCacheInuse uint64 | |
// MCacheSys is bytes of memory obtained from the OS for | |
// mcache structures. | |
MCacheSys uint64 | |
// BuckHashSys is bytes of memory in profiling bucket hash tables. | |
BuckHashSys uint64 | |
// GCSys is bytes of memory in garbage collection metadata. | |
GCSys uint64 | |
// OtherSys is bytes of memory in miscellaneous off-heap | |
// runtime allocations. | |
OtherSys uint64 | |
// Garbage collector statistics. | |
// NextGC is the target heap size of the next GC cycle. | |
// | |
// The garbage collector's goal is to keep HeapAlloc ≤ NextGC. | |
// At the end of each GC cycle, the target for the next cycle | |
// is computed based on the amount of reachable data and the | |
// value of GOGC. | |
NextGC uint64 | |
// LastGC is the time the last garbage collection finished, as | |
// nanoseconds since 1970 (the UNIX epoch). | |
LastGC uint64 | |
// PauseTotalNs is the cumulative nanoseconds in GC | |
// stop-the-world pauses since the program started. | |
// | |
// During a stop-the-world pause, all goroutines are paused | |
// and only the garbage collector can run. | |
PauseTotalNs uint64 | |
// PauseNs is a circular buffer of recent GC stop-the-world | |
// pause times in nanoseconds. | |
// | |
// The most recent pause is at PauseNs[(NumGC+255)%256]. In | |
// general, PauseNs[N%256] records the time paused in the most | |
// recent N%256th GC cycle. There may be multiple pauses per | |
// GC cycle; this is the sum of all pauses during a cycle. | |
PauseNs [256]uint64 | |
// PauseEnd is a circular buffer of recent GC pause end times, | |
// as nanoseconds since 1970 (the UNIX epoch). | |
// | |
// This buffer is filled the same way as PauseNs. There may be | |
// multiple pauses per GC cycle; this records the end of the | |
// last pause in a cycle. | |
PauseEnd [256]uint64 | |
// NumGC is the number of completed GC cycles. | |
NumGC uint32 | |
// NumForcedGC is the number of GC cycles that were forced by | |
// the application calling the GC function. | |
NumForcedGC uint32 | |
// GCCPUFraction is the fraction of this program's available | |
// CPU time used by the GC since the program started. | |
// | |
// GCCPUFraction is expressed as a number between 0 and 1, | |
// where 0 means GC has consumed none of this program's CPU. A | |
// program's available CPU time is defined as the integral of | |
// GOMAXPROCS since the program started. That is, if | |
// GOMAXPROCS is 2 and a program has been running for 10 | |
// seconds, its "available CPU" is 20 seconds. GCCPUFraction | |
// does not include CPU time used for write barrier activity. | |
// | |
// This is the same as the fraction of CPU reported by | |
// GODEBUG=gctrace=1. | |
GCCPUFraction float64 | |
// EnableGC indicates that GC is enabled. It is always true, | |
// even if GOGC=off. | |
EnableGC bool | |
// DebugGC is currently unused. | |
DebugGC bool | |
// BySize reports per-size class allocation statistics. | |
// | |
// BySize[N] gives statistics for allocations of size S where | |
// BySize[N-1].Size < S ≤ BySize[N].Size. | |
// | |
// This does not report allocations larger than BySize[60].Size. | |
BySize [61]struct { | |
// Size is the maximum byte size of an object in this | |
// size class. | |
Size uint32 | |
// Mallocs is the cumulative count of heap objects | |
// allocated in this size class. The cumulative bytes | |
// of allocation is Size*Mallocs. The number of live | |
// objects in this size class is Mallocs - Frees. | |
Mallocs uint64 | |
// Frees is the cumulative count of heap objects freed | |
// in this size class. | |
Frees uint64 | |
} | |
} | |
func init() { | |
if offset := unsafe.Offsetof(memstats.heap_live); offset%8 != 0 { | |
println(offset) | |
throw("memstats.heap_live not aligned to 8 bytes") | |
} | |
if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 { | |
println(offset) | |
throw("memstats.heapStats not aligned to 8 bytes") | |
} | |
if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 { | |
println(offset) | |
throw("memstats.gcPauseDist not aligned to 8 bytes") | |
} | |
// Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. | |
// [3]heapStatsDelta) to be 8-byte aligned. | |
if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { | |
println(size) | |
throw("heapStatsDelta not a multiple of 8 bytes in size") | |
} | |
} | |
// ReadMemStats populates m with memory allocator statistics. | |
// | |
// The returned memory allocator statistics are up to date as of the | |
// call to ReadMemStats. This is in contrast with a heap profile, | |
// which is a snapshot as of the most recently completed garbage | |
// collection cycle. | |
func ReadMemStats(m *MemStats) { | |
stopTheWorld("read mem stats") | |
systemstack(func() { | |
readmemstats_m(m) | |
}) | |
startTheWorld() | |
} | |
func readmemstats_m(stats *MemStats) { | |
updatememstats() | |
stats.Alloc = memstats.alloc | |
stats.TotalAlloc = memstats.total_alloc | |
stats.Sys = memstats.sys | |
stats.Mallocs = memstats.nmalloc | |
stats.Frees = memstats.nfree | |
stats.HeapAlloc = memstats.alloc | |
stats.HeapSys = memstats.heap_sys.load() | |
// By definition, HeapIdle is memory that was mapped | |
// for the heap but is not currently used to hold heap | |
// objects. It also specifically is memory that can be | |
// used for other purposes, like stacks, but this memory | |
// is subtracted out of HeapSys before it makes that | |
// transition. Put another way: | |
// | |
// heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes | |
// heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose | |
// | |
// or | |
// | |
// heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse | |
// heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse | |
// | |
// => heap_idle = heap_sys - heap_inuse | |
stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse | |
stats.HeapInuse = memstats.heap_inuse | |
stats.HeapReleased = memstats.heap_released | |
stats.HeapObjects = memstats.heap_objects | |
stats.StackInuse = memstats.stacks_inuse | |
// memstats.stacks_sys is only memory mapped directly for OS stacks. | |
// Add in heap-allocated stack memory for user consumption. | |
stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load() | |
stats.MSpanInuse = memstats.mspan_inuse | |
stats.MSpanSys = memstats.mspan_sys.load() | |
stats.MCacheInuse = memstats.mcache_inuse | |
stats.MCacheSys = memstats.mcache_sys.load() | |
stats.BuckHashSys = memstats.buckhash_sys.load() | |
// MemStats defines GCSys as an aggregate of all memory related | |
// to the memory management system, but we track this memory | |
// at a more granular level in the runtime. | |
stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse | |
stats.OtherSys = memstats.other_sys.load() | |
stats.NextGC = memstats.next_gc | |
stats.LastGC = memstats.last_gc_unix | |
stats.PauseTotalNs = memstats.pause_total_ns | |
stats.PauseNs = memstats.pause_ns | |
stats.PauseEnd = memstats.pause_end | |
stats.NumGC = memstats.numgc | |
stats.NumForcedGC = memstats.numforcedgc | |
stats.GCCPUFraction = memstats.gc_cpu_fraction | |
stats.EnableGC = true | |
// Handle BySize. Copy N values, where N is | |
// the minimum of the lengths of the two arrays. | |
// Unfortunately copy() won't work here because | |
// the arrays have different structs. | |
// | |
// TODO(mknyszek): Consider renaming the fields | |
// of by_size's elements to align so we can use | |
// the copy built-in. | |
bySizeLen := len(stats.BySize) | |
if l := len(memstats.by_size); l < bySizeLen { | |
bySizeLen = l | |
} | |
for i := 0; i < bySizeLen; i++ { | |
stats.BySize[i].Size = memstats.by_size[i].size | |
stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc | |
stats.BySize[i].Frees = memstats.by_size[i].nfree | |
} | |
} | |
//go:linkname readGCStats runtime/debug.readGCStats | |
func readGCStats(pauses *[]uint64) { | |
systemstack(func() { | |
readGCStats_m(pauses) | |
}) | |
} | |
// readGCStats_m must be called on the system stack because it acquires the heap | |
// lock. See mheap for details. | |
//go:systemstack | |
func readGCStats_m(pauses *[]uint64) { | |
p := *pauses | |
// Calling code in runtime/debug should make the slice large enough. | |
if cap(p) < len(memstats.pause_ns)+3 { | |
throw("short slice passed to readGCStats") | |
} | |
// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns. | |
lock(&mheap_.lock) | |
n := memstats.numgc | |
if n > uint32(len(memstats.pause_ns)) { | |
n = uint32(len(memstats.pause_ns)) | |
} | |
// The pause buffer is circular. The most recent pause is at | |
// pause_ns[(numgc-1)%len(pause_ns)], and then backward | |
// from there to go back farther in time. We deliver the times | |
// most recent first (in p[0]). | |
p = p[:cap(p)] | |
for i := uint32(0); i < n; i++ { | |
j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns)) | |
p[i] = memstats.pause_ns[j] | |
p[n+i] = memstats.pause_end[j] | |
} | |
p[n+n] = memstats.last_gc_unix | |
p[n+n+1] = uint64(memstats.numgc) | |
p[n+n+2] = memstats.pause_total_ns | |
unlock(&mheap_.lock) | |
*pauses = p[:n+n+3] | |
} | |
// Updates the memstats structure. | |
// | |
// The world must be stopped. | |
// | |
//go:nowritebarrier | |
func updatememstats() { | |
assertWorldStopped() | |
// Flush mcaches to mcentral before doing anything else. | |
// | |
// Flushing to the mcentral may in general cause stats to | |
// change as mcentral data structures are manipulated. | |
systemstack(flushallmcaches) | |
memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse) | |
memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse) | |
memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() + | |
memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + | |
memstats.other_sys.load() | |
// Calculate memory allocator stats. | |
// During program execution we only count number of frees and amount of freed memory. | |
// Current number of alive objects in the heap and amount of alive heap memory | |
// are calculated by scanning all spans. | |
// Total number of mallocs is calculated as number of frees plus number of alive objects. | |
// Similarly, total amount of allocated memory is calculated as amount of freed memory | |
// plus amount of alive heap memory. | |
memstats.alloc = 0 | |
memstats.total_alloc = 0 | |
memstats.nmalloc = 0 | |
memstats.nfree = 0 | |
for i := 0; i < len(memstats.by_size); i++ { | |
memstats.by_size[i].nmalloc = 0 | |
memstats.by_size[i].nfree = 0 | |
} | |
// Collect consistent stats, which are the source-of-truth in the some cases. | |
var consStats heapStatsDelta | |
memstats.heapStats.unsafeRead(&consStats) | |
// Collect large allocation stats. | |
totalAlloc := uint64(consStats.largeAlloc) | |
memstats.nmalloc += uint64(consStats.largeAllocCount) | |
totalFree := uint64(consStats.largeFree) | |
memstats.nfree += uint64(consStats.largeFreeCount) | |
// Collect per-sizeclass stats. | |
for i := 0; i < _NumSizeClasses; i++ { | |
// Malloc stats. | |
a := uint64(consStats.smallAllocCount[i]) | |
totalAlloc += a * uint64(class_to_size[i]) | |
memstats.nmalloc += a | |
memstats.by_size[i].nmalloc = a | |
// Free stats. | |
f := uint64(consStats.smallFreeCount[i]) | |
totalFree += f * uint64(class_to_size[i]) | |
memstats.nfree += f | |
memstats.by_size[i].nfree = f | |
} | |
// Account for tiny allocations. | |
memstats.nfree += memstats.tinyallocs | |
memstats.nmalloc += memstats.tinyallocs | |
// Calculate derived stats. | |
memstats.total_alloc = totalAlloc | |
memstats.alloc = totalAlloc - totalFree | |
memstats.heap_objects = memstats.nmalloc - memstats.nfree | |
memstats.stacks_inuse = uint64(consStats.inStacks) | |
memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs) | |
memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits) | |
// We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. | |
memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse | |
// The world is stopped, so the consistent stats (after aggregation) | |
// should be identical to some combination of memstats. In particular: | |
// | |
// * heap_inuse == inHeap | |
// * heap_released == released | |
// * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits | |
// | |
// Check if that's actually true. | |
// | |
// TODO(mknyszek): Maybe don't throw here. It would be bad if a | |
// bug in otherwise benign accounting caused the whole application | |
// to crash. | |
if memstats.heap_inuse != uint64(consStats.inHeap) { | |
print("runtime: heap_inuse=", memstats.heap_inuse, "\n") | |
print("runtime: consistent value=", consStats.inHeap, "\n") | |
throw("heap_inuse and consistent stats are not equal") | |
} | |
if memstats.heap_released != uint64(consStats.released) { | |
print("runtime: heap_released=", memstats.heap_released, "\n") | |
print("runtime: consistent value=", consStats.released, "\n") | |
throw("heap_released and consistent stats are not equal") | |
} | |
globalRetained := memstats.heap_sys.load() - memstats.heap_released | |
consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits) | |
if globalRetained != consRetained { | |
print("runtime: global value=", globalRetained, "\n") | |
print("runtime: consistent value=", consRetained, "\n") | |
throw("measures of the retained heap are not equal") | |
} | |
} | |
// flushmcache flushes the mcache of allp[i]. | |
// | |
// The world must be stopped. | |
// | |
//go:nowritebarrier | |
func flushmcache(i int) { | |
assertWorldStopped() | |
p := allp[i] | |
c := p.mcache | |
if c == nil { | |
return | |
} | |
c.releaseAll() | |
stackcache_clear(c) | |
} | |
// flushallmcaches flushes the mcaches of all Ps. | |
// | |
// The world must be stopped. | |
// | |
//go:nowritebarrier | |
func flushallmcaches() { | |
assertWorldStopped() | |
for i := 0; i < int(gomaxprocs); i++ { | |
flushmcache(i) | |
} | |
} | |
// sysMemStat represents a global system statistic that is managed atomically. | |
// | |
// This type must structurally be a uint64 so that mstats aligns with MemStats. | |
type sysMemStat uint64 | |
// load atomically reads the value of the stat. | |
// | |
// Must be nosplit as it is called in runtime initialization, e.g. newosproc0. | |
//go:nosplit | |
func (s *sysMemStat) load() uint64 { | |
return atomic.Load64((*uint64)(s)) | |
} | |
// add atomically adds the sysMemStat by n. | |
// | |
// Must be nosplit as it is called in runtime initialization, e.g. newosproc0. | |
//go:nosplit | |
func (s *sysMemStat) add(n int64) { | |
if s == nil { | |
return | |
} | |
val := atomic.Xadd64((*uint64)(s), n) | |
if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) { | |
print("runtime: val=", val, " n=", n, "\n") | |
throw("sysMemStat overflow") | |
} | |
} | |
// heapStatsDelta contains deltas of various runtime memory statistics | |
// that need to be updated together in order for them to be kept | |
// consistent with one another. | |
type heapStatsDelta struct { | |
// Memory stats. | |
committed int64 // byte delta of memory committed | |
released int64 // byte delta of released memory generated | |
inHeap int64 // byte delta of memory placed in the heap | |
inStacks int64 // byte delta of memory reserved for stacks | |
inWorkBufs int64 // byte delta of memory reserved for work bufs | |
inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits | |
// Allocator stats. | |
largeAlloc uintptr // bytes allocated for large objects | |
largeAllocCount uintptr // number of large object allocations | |
smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects | |
largeFree uintptr // bytes freed for large objects (>maxSmallSize) | |
largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) | |
smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) | |
// Add a uint32 to ensure this struct is a multiple of 8 bytes in size. | |
// Only necessary on 32-bit platforms. | |
// _ [(sys.PtrSize / 4) % 2]uint32 | |
} | |
// merge adds in the deltas from b into a. | |
func (a *heapStatsDelta) merge(b *heapStatsDelta) { | |
a.committed += b.committed | |
a.released += b.released | |
a.inHeap += b.inHeap | |
a.inStacks += b.inStacks | |
a.inWorkBufs += b.inWorkBufs | |
a.inPtrScalarBits += b.inPtrScalarBits | |
a.largeAlloc += b.largeAlloc | |
a.largeAllocCount += b.largeAllocCount | |
for i := range b.smallAllocCount { | |
a.smallAllocCount[i] += b.smallAllocCount[i] | |
} | |
a.largeFree += b.largeFree | |
a.largeFreeCount += b.largeFreeCount | |
for i := range b.smallFreeCount { | |
a.smallFreeCount[i] += b.smallFreeCount[i] | |
} | |
} | |
// consistentHeapStats represents a set of various memory statistics | |
// whose updates must be viewed completely to get a consistent | |
// state of the world. | |
// | |
// To write updates to memory stats use the acquire and release | |
// methods. To obtain a consistent global snapshot of these statistics, | |
// use read. | |
type consistentHeapStats struct { | |
// stats is a ring buffer of heapStatsDelta values. | |
// Writers always atomically update the delta at index gen. | |
// | |
// Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...) | |
// and synchronizing with writers by observing each P's | |
// statsSeq field. If the reader observes a P not writing, | |
// it can be sure that it will pick up the new gen value the | |
// next time it writes. | |
// | |
// The reader then takes responsibility by clearing space | |
// in the ring buffer for the next reader to rotate gen to | |
// that space (i.e. it merges in values from index (gen-2) mod 3 | |
// to index (gen-1) mod 3, then clears the former). | |
// | |
// Note that this means only one reader can be reading at a time. | |
// There is no way for readers to synchronize. | |
// | |
// This process is why we need a ring buffer of size 3 instead | |
// of 2: one is for the writers, one contains the most recent | |
// data, and the last one is clear so writers can begin writing | |
// to it the moment gen is updated. | |
stats [3]heapStatsDelta | |
// gen represents the current index into which writers | |
// are writing, and can take on the value of 0, 1, or 2. | |
// This value is updated atomically. | |
gen uint32 | |
// noPLock is intended to provide mutual exclusion for updating | |
// stats when no P is available. It does not block other writers | |
// with a P, only other writers without a P and the reader. Because | |
// stats are usually updated when a P is available, contention on | |
// this lock should be minimal. | |
noPLock mutex | |
} | |
// acquire returns a heapStatsDelta to be updated. In effect, | |
// it acquires the shard for writing. release must be called | |
// as soon as the relevant deltas are updated. | |
// | |
// The returned heapStatsDelta must be updated atomically. | |
// | |
// The caller's P must not change between acquire and | |
// release. This also means that the caller should not | |
// acquire a P or release its P in between. | |
func (m *consistentHeapStats) acquire() *heapStatsDelta { | |
if pp := getg().m.p.ptr(); pp != nil { | |
seq := atomic.Xadd(&pp.statsSeq, 1) | |
if seq%2 == 0 { | |
// Should have been incremented to odd. | |
print("runtime: seq=", seq, "\n") | |
throw("bad sequence number") | |
} | |
} else { | |
lock(&m.noPLock) | |
} | |
gen := atomic.Load(&m.gen) % 3 | |
return &m.stats[gen] | |
} | |
// release indicates that the writer is done modifying | |
// the delta. The value returned by the corresponding | |
// acquire must no longer be accessed or modified after | |
// release is called. | |
// | |
// The caller's P must not change between acquire and | |
// release. This also means that the caller should not | |
// acquire a P or release its P in between. | |
func (m *consistentHeapStats) release() { | |
if pp := getg().m.p.ptr(); pp != nil { | |
seq := atomic.Xadd(&pp.statsSeq, 1) | |
if seq%2 != 0 { | |
// Should have been incremented to even. | |
print("runtime: seq=", seq, "\n") | |
throw("bad sequence number") | |
} | |
} else { | |
unlock(&m.noPLock) | |
} | |
} | |
// unsafeRead aggregates the delta for this shard into out. | |
// | |
// Unsafe because it does so without any synchronization. The | |
// world must be stopped. | |
func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { | |
assertWorldStopped() | |
for i := range m.stats { | |
out.merge(&m.stats[i]) | |
} | |
} | |
// unsafeClear clears the shard. | |
// | |
// Unsafe because the world must be stopped and values should | |
// be donated elsewhere before clearing. | |
func (m *consistentHeapStats) unsafeClear() { | |
assertWorldStopped() | |
for i := range m.stats { | |
m.stats[i] = heapStatsDelta{} | |
} | |
} | |
// read takes a globally consistent snapshot of m | |
// and puts the aggregated value in out. Even though out is a | |
// heapStatsDelta, the resulting values should be complete and | |
// valid statistic values. | |
// | |
// Not safe to call concurrently. The world must be stopped | |
// or metricsSema must be held. | |
func (m *consistentHeapStats) read(out *heapStatsDelta) { | |
// Getting preempted after this point is not safe because | |
// we read allp. We need to make sure a STW can't happen | |
// so it doesn't change out from under us. | |
mp := acquirem() | |
// Get the current generation. We can be confident that this | |
// will not change since read is serialized and is the only | |
// one that modifies currGen. | |
currGen := atomic.Load(&m.gen) | |
prevGen := currGen - 1 | |
if currGen == 0 { | |
prevGen = 2 | |
} | |
// Prevent writers without a P from writing while we update gen. | |
lock(&m.noPLock) | |
// Rotate gen, effectively taking a snapshot of the state of | |
// these statistics at the point of the exchange by moving | |
// writers to the next set of deltas. | |
// | |
// This exchange is safe to do because we won't race | |
// with anyone else trying to update this value. | |
atomic.Xchg(&m.gen, (currGen+1)%3) | |
// Allow P-less writers to continue. They'll be writing to the | |
// next generation now. | |
unlock(&m.noPLock) | |
for _, p := range allp { | |
// Spin until there are no more writers. | |
for atomic.Load(&p.statsSeq)%2 != 0 { | |
} | |
} | |
// At this point we've observed that each sequence | |
// number is even, so any future writers will observe | |
// the new gen value. That means it's safe to read from | |
// the other deltas in the stats buffer. | |
// Perform our responsibilities and free up | |
// stats[prevGen] for the next time we want to take | |
// a snapshot. | |
m.stats[currGen].merge(&m.stats[prevGen]) | |
m.stats[prevGen] = heapStatsDelta{} | |
// Finally, copy out the complete delta. | |
*out = m.stats[currGen] | |
releasem(mp) | |
} |