Skip to content

Commit daaf29c

Browse files
dvyukovrsc
authored andcommitted
sync: improve RWMutex performance
The new implementation features wait-free fast path for readers which significantly improves performance/scalability on read-mostly workloads. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows: benchmark old ns/op new ns/op delta BenchmarkRWMutexUncontended 179.00 96.60 -46.03% BenchmarkRWMutexUncontended-2 89.10 49.10 -44.89% BenchmarkRWMutexUncontended-4 44.70 24.70 -44.74% BenchmarkRWMutexUncontended-8 23.30 12.90 -44.64% BenchmarkRWMutexUncontended-16 16.80 8.75 -47.92% BenchmarkRWMutexWrite100 79.60 26.80 -66.33% BenchmarkRWMutexWrite100-2 305.00 33.00 -89.18% BenchmarkRWMutexWrite100-4 245.00 113.00 -53.88% BenchmarkRWMutexWrite100-8 330.00 147.00 -55.45% BenchmarkRWMutexWrite100-16 371.00 152.00 -59.03% BenchmarkRWMutexWrite10 78.30 29.80 -61.94% BenchmarkRWMutexWrite10-2 348.00 165.00 -52.59% BenchmarkRWMutexWrite10-4 447.00 199.00 -55.48% BenchmarkRWMutexWrite10-8 564.00 180.00 -68.09% BenchmarkRWMutexWrite10-16 492.00 192.00 -60.98% BenchmarkRWMutexWorkWrite100 1077.00 1037.00 -3.71% BenchmarkRWMutexWorkWrite100-2 659.00 596.00 -9.56% BenchmarkRWMutexWorkWrite100-4 509.00 361.00 -29.08% BenchmarkRWMutexWorkWrite100-8 603.00 351.00 -41.79% BenchmarkRWMutexWorkWrite100-16 750.00 607.00 -19.07% BenchmarkRWMutexWorkWrite10 990.00 951.00 -3.94% BenchmarkRWMutexWorkWrite10-2 1119.00 1070.00 -4.38% BenchmarkRWMutexWorkWrite10-4 1300.00 1199.00 -7.77% BenchmarkRWMutexWorkWrite10-8 1424.00 1291.00 -9.34% BenchmarkRWMutexWorkWrite10-16 1981.00 1786.00 -9.84% R=rsc CC=golang-dev https://golang.org/cl/4671051
1 parent 4c63129 commit daaf29c

File tree

2 files changed

+118
-30
lines changed

2 files changed

+118
-30
lines changed

src/pkg/sync/rwmutex.go

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,53 +4,46 @@
44

55
package sync
66

7-
import "sync/atomic"
7+
import (
8+
"runtime"
9+
"sync/atomic"
10+
)
811

912
// An RWMutex is a reader/writer mutual exclusion lock.
1013
// The lock can be held by an arbitrary number of readers
1114
// or a single writer.
1215
// RWMutexes can be created as part of other
1316
// structures; the zero value for a RWMutex is
1417
// an unlocked mutex.
15-
//
16-
// Writers take priority over Readers: no new RLocks
17-
// are granted while a blocked Lock call is waiting.
1818
type RWMutex struct {
19-
w Mutex // held if there are pending readers or writers
20-
r Mutex // held if the w is being rd
21-
readerCount int32 // number of pending readers
19+
w Mutex // held if there are pending writers
20+
writerSem uint32 // semaphore for writers to wait for completing readers
21+
readerSem uint32 // semaphore for readers to wait for completing writers
22+
readerCount int32 // number of pending readers
23+
readerWait int32 // number of departing readers
2224
}
2325

26+
const rwmutexMaxReaders = 1 << 30
27+
2428
// RLock locks rw for reading.
25-
// If the lock is already locked for writing or there is a writer already waiting
26-
// to release the lock, RLock blocks until the writer has released the lock.
2729
func (rw *RWMutex) RLock() {
28-
// Use rw.r.Lock() to block granting the RLock if a goroutine
29-
// is waiting for its Lock. This is the prevent starvation of W in
30-
// this situation:
31-
// A: rw.RLock() // granted
32-
// W: rw.Lock() // waiting for rw.w().Lock()
33-
// B: rw.RLock() // granted
34-
// C: rw.RLock() // granted
35-
// B: rw.RUnlock()
36-
// ... (new readers come and go indefinitely, W is starving)
37-
rw.r.Lock()
38-
if atomic.AddInt32(&rw.readerCount, 1) == 1 {
39-
// The first reader locks rw.w, so writers will be blocked
40-
// while the readers have the RLock.
41-
rw.w.Lock()
30+
if atomic.AddInt32(&rw.readerCount, 1) < 0 {
31+
// A writer is pending, wait for it.
32+
runtime.Semacquire(&rw.readerSem)
4233
}
43-
rw.r.Unlock()
4434
}
4535

4636
// RUnlock undoes a single RLock call;
4737
// it does not affect other simultaneous readers.
4838
// It is a run-time error if rw is not locked for reading
4939
// on entry to RUnlock.
5040
func (rw *RWMutex) RUnlock() {
51-
if atomic.AddInt32(&rw.readerCount, -1) == 0 {
52-
// last reader finished, enable writers
53-
rw.w.Unlock()
41+
if atomic.AddInt32(&rw.readerCount, -1) < 0 {
42+
// A writer is pending.
43+
if atomic.AddInt32(&rw.readerWait, -1) == 0 {
44+
// The last reader unblocks the writer.
45+
runtime.Semrelease(&rw.writerSem)
46+
}
5447
}
5548
}
5649

@@ -61,9 +54,14 @@ func (rw *RWMutex) RUnlock() {
6154
// a blocked Lock call excludes new readers from acquiring
6255
// the lock.
6356
func (rw *RWMutex) Lock() {
64-
rw.r.Lock()
57+
// First, resolve competition with other writers.
6558
rw.w.Lock()
66-
rw.r.Unlock()
59+
// Announce to readers there is a pending writer.
60+
r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders
61+
// Wait for active readers.
62+
if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 {
63+
runtime.Semacquire(&rw.writerSem)
64+
}
6765
}
6866

6967
// Unlock unlocks rw for writing. It is a run-time error if rw is
@@ -72,7 +70,16 @@ func (rw *RWMutex) Lock() {
7270
// As with Mutexes, a locked RWMutex is not associated with a particular
7371
// goroutine. One goroutine may RLock (Lock) an RWMutex and then
7472
// arrange for another goroutine to RUnlock (Unlock) it.
75-
func (rw *RWMutex) Unlock() { rw.w.Unlock() }
73+
func (rw *RWMutex) Unlock() {
74+
// Announce to readers there is no active writer.
75+
r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders)
76+
// Unblock blocked readers, if any.
77+
for i := 0; i < int(r); i++ {
78+
runtime.Semrelease(&rw.readerSem)
79+
}
80+
// Allow other writers to proceed.
81+
rw.w.Unlock()
82+
}
7683

7784
// RLocker returns a Locker interface that implements
7885
// the Lock and Unlock methods by calling rw.RLock and rw.RUnlock.

src/pkg/sync/rwmutex_test.go

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,84 @@ func TestRLocker(t *testing.T) {
154154
wl.Unlock()
155155
}
156156
}
157+
158+
func BenchmarkRWMutexUncontended(b *testing.B) {
159+
type PaddedRWMutex struct {
160+
RWMutex
161+
pad [32]uint32
162+
}
163+
const CallsPerSched = 1000
164+
procs := runtime.GOMAXPROCS(-1)
165+
N := int32(b.N / CallsPerSched)
166+
c := make(chan bool, procs)
167+
for p := 0; p < procs; p++ {
168+
go func() {
169+
var rwm PaddedRWMutex
170+
for atomic.AddInt32(&N, -1) >= 0 {
171+
runtime.Gosched()
172+
for g := 0; g < CallsPerSched; g++ {
173+
rwm.RLock()
174+
rwm.RLock()
175+
rwm.RUnlock()
176+
rwm.RUnlock()
177+
rwm.Lock()
178+
rwm.Unlock()
179+
}
180+
}
181+
c <- true
182+
}()
183+
}
184+
for p := 0; p < procs; p++ {
185+
<-c
186+
}
187+
}
188+
189+
func benchmarkRWMutex(b *testing.B, localWork, writeRatio int) {
190+
const CallsPerSched = 1000
191+
procs := runtime.GOMAXPROCS(-1)
192+
N := int32(b.N / CallsPerSched)
193+
c := make(chan bool, procs)
194+
var rwm RWMutex
195+
for p := 0; p < procs; p++ {
196+
go func() {
197+
foo := 0
198+
for atomic.AddInt32(&N, -1) >= 0 {
199+
runtime.Gosched()
200+
for g := 0; g < CallsPerSched; g++ {
201+
foo++
202+
if foo%writeRatio == 0 {
203+
rwm.Lock()
204+
rwm.Unlock()
205+
} else {
206+
rwm.RLock()
207+
for i := 0; i != localWork; i += 1 {
208+
foo *= 2
209+
foo /= 2
210+
}
211+
rwm.RUnlock()
212+
}
213+
}
214+
}
215+
c <- foo == 42
216+
}()
217+
}
218+
for p := 0; p < procs; p++ {
219+
<-c
220+
}
221+
}
222+
223+
func BenchmarkRWMutexWrite100(b *testing.B) {
224+
benchmarkRWMutex(b, 0, 100)
225+
}
226+
227+
func BenchmarkRWMutexWrite10(b *testing.B) {
228+
benchmarkRWMutex(b, 0, 10)
229+
}
230+
231+
func BenchmarkRWMutexWorkWrite100(b *testing.B) {
232+
benchmarkRWMutex(b, 100, 100)
233+
}
234+
235+
func BenchmarkRWMutexWorkWrite10(b *testing.B) {
236+
benchmarkRWMutex(b, 100, 10)
237+
}

0 commit comments

Comments
 (0)