Skip to content

Commit 8fc6ed4

Browse files
committed
sync: less agressive local caching in Pool
Currently Pool can cache up to 15 elements per P, and these elements are not accesible to other Ps. If a Pool caches large objects, say 2MB, and GOMAXPROCS is set to a large value, say 32, then the Pool can waste up to 960MB. The new caching policy caches at most 1 per-P element, the rest is shared between Ps. Get/Put performance is unchanged. Nested Get/Put performance is 57% worse. However, overall scalability of nested Get/Put is significantly improved, so the new policy starts winning under contention. benchmark old ns/op new ns/op delta BenchmarkPool 27.4 26.7 -2.55% BenchmarkPool-4 6.63 6.59 -0.60% BenchmarkPool-16 1.98 1.87 -5.56% BenchmarkPool-64 1.93 1.86 -3.63% BenchmarkPoolOverlflow 3970 6235 +57.05% BenchmarkPoolOverlflow-4 10935 1668 -84.75% BenchmarkPoolOverlflow-16 13419 520 -96.12% BenchmarkPoolOverlflow-64 10295 380 -96.31% LGTM=rsc R=rsc CC=golang-codereviews, khr https://golang.org/cl/86020043
1 parent 1e1506a commit 8fc6ed4

File tree

3 files changed

+92
-117
lines changed

3 files changed

+92
-117
lines changed

src/pkg/runtime/mgc0.c

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -91,42 +91,24 @@ enum {
9191
// Initialized from $GOGC. GOGC=off means no gc.
9292
static int32 gcpercent = GcpercentUnknown;
9393

94-
static struct
95-
{
96-
Lock;
97-
void* head;
98-
} pools;
94+
static FuncVal* poolcleanup;
9995

10096
void
101-
sync·runtime_registerPool(void **p)
97+
sync·runtime_registerPoolCleanup(FuncVal *f)
10298
{
103-
runtime·lock(&pools);
104-
p[0] = pools.head;
105-
pools.head = p;
106-
runtime·unlock(&pools);
99+
poolcleanup = f;
107100
}
108101

109102
static void
110103
clearpools(void)
111104
{
112-
void **pool, **next;
113105
P *p, **pp;
114106
MCache *c;
115-
uintptr off;
116107
int32 i;
117108

118109
// clear sync.Pool's
119-
for(pool = pools.head; pool != nil; pool = next) {
120-
next = pool[0];
121-
pool[0] = nil; // next
122-
pool[1] = nil; // local
123-
pool[2] = nil; // localSize
124-
off = (uintptr)pool[3] / sizeof(void*);
125-
pool[off+0] = nil; // global slice
126-
pool[off+1] = nil;
127-
pool[off+2] = nil;
128-
}
129-
pools.head = nil;
110+
if(poolcleanup != nil)
111+
reflect·call(poolcleanup, nil, 0, 0);
130112

131113
for(pp=runtime·allp; p=*pp; pp++) {
132114
// clear tinyalloc pool

src/pkg/sync/pool.go

Lines changed: 84 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,6 @@ import (
1010
"unsafe"
1111
)
1212

13-
const (
14-
cacheLineSize = 128
15-
poolLocalSize = 2 * cacheLineSize
16-
poolLocalCap = poolLocalSize/unsafe.Sizeof(*(*interface{})(nil)) - 1
17-
)
18-
1913
// A Pool is a set of temporary objects that may be individually saved and
2014
// retrieved.
2115
//
@@ -46,36 +40,21 @@ const (
4640
// free list.
4741
//
4842
type Pool struct {
49-
// The following fields are known to runtime.
50-
next *Pool // for use by runtime
51-
local *poolLocal // local fixed-size per-P pool, actually an array
52-
localSize uintptr // size of the local array
53-
globalOffset uintptr // offset of global
54-
// The rest is not known to runtime.
43+
local unsafe.Pointer // local fixed-size per-P pool, actual type is [P]poolLocal
44+
localSize uintptr // size of the local array
5545

5646
// New optionally specifies a function to generate
5747
// a value when Get would otherwise return nil.
5848
// It may not be changed concurrently with calls to Get.
5949
New func() interface{}
60-
61-
pad [cacheLineSize]byte
62-
// Read-mostly date above this point, mutable data follows.
63-
mu Mutex
64-
global []interface{} // global fallback pool
6550
}
6651

6752
// Local per-P Pool appendix.
6853
type poolLocal struct {
69-
tail int
70-
unused int
71-
buf [poolLocalCap]interface{}
72-
}
73-
74-
func init() {
75-
var v poolLocal
76-
if unsafe.Sizeof(v) != poolLocalSize {
77-
panic("sync: incorrect pool size")
78-
}
54+
private interface{} // Can be used only by the respective P.
55+
shared []interface{} // Can be used by any P.
56+
Mutex // Protects shared.
57+
pad [128]byte // Prevents false sharing.
7958
}
8059

8160
// Put adds x to the pool.
@@ -90,14 +69,17 @@ func (p *Pool) Put(x interface{}) {
9069
return
9170
}
9271
l := p.pin()
93-
t := l.tail
94-
if t < int(poolLocalCap) {
95-
l.buf[t] = x
96-
l.tail = t + 1
97-
runtime_procUnpin()
72+
if l.private == nil {
73+
l.private = x
74+
x = nil
75+
}
76+
runtime_procUnpin()
77+
if x == nil {
9878
return
9979
}
100-
p.putSlow(l, x)
80+
l.Lock()
81+
l.shared = append(l.shared, x)
82+
l.Unlock()
10183
}
10284

10385
// Get selects an arbitrary item from the Pool, removes it from the
@@ -116,69 +98,49 @@ func (p *Pool) Get() interface{} {
11698
return nil
11799
}
118100
l := p.pin()
119-
t := l.tail
120-
if t > 0 {
121-
t -= 1
122-
x := l.buf[t]
123-
l.tail = t
124-
runtime_procUnpin()
101+
x := l.private
102+
l.private = nil
103+
runtime_procUnpin()
104+
if x != nil {
125105
return x
126106
}
127-
return p.getSlow()
128-
}
129-
130-
func (p *Pool) putSlow(l *poolLocal, x interface{}) {
131-
// Grab half of items from local pool and put to global pool.
132-
// Can not lock the mutex while pinned.
133-
const N = int(poolLocalCap/2 + 1)
134-
var buf [N]interface{}
135-
buf[0] = x
136-
for i := 1; i < N; i++ {
137-
l.tail--
138-
buf[i] = l.buf[l.tail]
107+
l.Lock()
108+
last := len(l.shared) - 1
109+
if last >= 0 {
110+
x = l.shared[last]
111+
l.shared = l.shared[:last]
139112
}
140-
runtime_procUnpin()
141-
142-
p.mu.Lock()
143-
p.global = append(p.global, buf[:]...)
144-
p.mu.Unlock()
113+
l.Unlock()
114+
if x != nil {
115+
return x
116+
}
117+
return p.getSlow()
145118
}
146119

147120
func (p *Pool) getSlow() (x interface{}) {
148-
// Grab a batch of items from global pool and put to local pool.
149-
// Can not lock the mutex while pinned.
150-
runtime_procUnpin()
151-
p.mu.Lock()
121+
// See the comment in pin regarding ordering of the loads.
122+
size := atomic.LoadUintptr(&p.localSize) // load-acquire
123+
local := p.local // load-consume
124+
// Try to steal one element from other procs.
152125
pid := runtime_procPin()
153-
s := p.localSize
154-
l := p.local
155-
if uintptr(pid) < s {
156-
l = indexLocal(l, pid)
157-
// Get the item to return.
158-
last := len(p.global) - 1
126+
runtime_procUnpin()
127+
for i := 0; i < int(size); i++ {
128+
l := indexLocal(local, (pid+i+1)%int(size))
129+
l.Lock()
130+
last := len(l.shared) - 1
159131
if last >= 0 {
160-
x = p.global[last]
161-
p.global = p.global[:last]
162-
}
163-
// Try to refill local pool, we may have been rescheduled to another P.
164-
if last > 0 && l.tail == 0 {
165-
n := int(poolLocalCap / 2)
166-
gl := len(p.global)
167-
if n > gl {
168-
n = gl
169-
}
170-
copy(l.buf[:], p.global[gl-n:])
171-
p.global = p.global[:gl-n]
172-
l.tail = n
132+
x = l.shared[last]
133+
l.shared = l.shared[:last]
134+
l.Unlock()
135+
break
173136
}
137+
l.Unlock()
174138
}
175-
runtime_procUnpin()
176-
p.mu.Unlock()
177139

178140
if x == nil && p.New != nil {
179141
x = p.New()
180142
}
181-
return
143+
return x
182144
}
183145

184146
// pin pins the current goroutine to P, disables preemption and returns poolLocal pool for the P.
@@ -199,32 +161,63 @@ func (p *Pool) pin() *poolLocal {
199161

200162
func (p *Pool) pinSlow() *poolLocal {
201163
// Retry under the mutex.
164+
// Can not lock the mutex while pinned.
202165
runtime_procUnpin()
203-
p.mu.Lock()
204-
defer p.mu.Unlock()
166+
allPoolsMu.Lock()
167+
defer allPoolsMu.Unlock()
205168
pid := runtime_procPin()
169+
// poolCleanup won't be called while we are pinned.
206170
s := p.localSize
207171
l := p.local
208172
if uintptr(pid) < s {
209173
return indexLocal(l, pid)
210174
}
211175
if p.local == nil {
212-
p.globalOffset = unsafe.Offsetof(p.global)
213-
runtime_registerPool(p)
176+
allPools = append(allPools, p)
214177
}
215178
// If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one.
216179
size := runtime.GOMAXPROCS(0)
217180
local := make([]poolLocal, size)
218-
atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&p.local)), unsafe.Pointer(&local[0])) // store-release
219-
atomic.StoreUintptr(&p.localSize, uintptr(size)) // store-release
181+
atomic.StorePointer((*unsafe.Pointer)(&p.local), unsafe.Pointer(&local[0])) // store-release
182+
atomic.StoreUintptr(&p.localSize, uintptr(size)) // store-release
220183
return &local[pid]
221184
}
222185

223-
func indexLocal(l *poolLocal, i int) *poolLocal {
224-
return (*poolLocal)(unsafe.Pointer(uintptr(unsafe.Pointer(l)) + unsafe.Sizeof(*l)*uintptr(i))) // uh...
186+
func poolCleanup() {
187+
// This function is called with the world stopped, at the beginning of a garbage collection.
188+
// It must not allocate and probably should not call any runtime functions.
189+
// Defensively zero out everything, 2 reasons:
190+
// 1. To prevent false retention of whole Pools.
191+
// 2. If GC happens while a goroutine works with l.shared in Put/Get,
192+
// it will retain whole Pool. So next cycle memory consumption would be doubled.
193+
for i, p := range allPools {
194+
allPools[i] = nil
195+
for i := 0; i < int(p.localSize); i++ {
196+
l := indexLocal(p.local, i)
197+
l.private = nil
198+
for j := range l.shared {
199+
l.shared[j] = nil
200+
}
201+
l.shared = nil
202+
}
203+
}
204+
allPools = []*Pool{}
205+
}
206+
207+
var (
208+
allPoolsMu Mutex
209+
allPools []*Pool
210+
)
211+
212+
func init() {
213+
runtime_registerPoolCleanup(poolCleanup)
214+
}
215+
216+
func indexLocal(l unsafe.Pointer, i int) *poolLocal {
217+
return &(*[1000000]poolLocal)(l)[i]
225218
}
226219

227220
// Implemented in runtime.
228-
func runtime_registerPool(*Pool)
221+
func runtime_registerPoolCleanup(cleanup func())
229222
func runtime_procPin() int
230223
func runtime_procUnpin()

src/pkg/sync/pool_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ func TestPool(t *testing.T) {
2525
}
2626
p.Put("a")
2727
p.Put("b")
28-
if g := p.Get(); g != "b" {
29-
t.Fatalf("got %#v; want b", g)
30-
}
3128
if g := p.Get(); g != "a" {
3229
t.Fatalf("got %#v; want a", g)
3330
}
31+
if g := p.Get(); g != "b" {
32+
t.Fatalf("got %#v; want b", g)
33+
}
3434
if g := p.Get(); g != nil {
3535
t.Fatalf("got %#v; want nil", g)
3636
}

0 commit comments

Comments
 (0)