Permalink
Browse files

runtime: improve timers scalability on multi-CPU systems

Use per-P timers, so each P may work with its own timers.

This CL improves performance on multi-CPU systems
in the following cases:

- When serving high number of concurrent connections
  with read/write deadlines set (for instance, highly loaded
  net/http server).

- When using high number of concurrent timers. These timers
  may be implicitly created via context.WithDeadline
  or context.WithTimeout.

Production servers should usually set timeout on connections
and external requests in order to prevent from resource leakage.
See https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/

Below are relevant benchmark results for various GOMAXPROCS values
on linux/amd64:

context package:

name                                     old time/op  new time/op  delta
WithTimeout/concurrency=40      4.92µs ± 0%  5.17µs ± 1%  +5.07%  (p=0.000 n=9+9)
WithTimeout/concurrency=4000    6.03µs ± 1%  6.49µs ± 0%  +7.63%  (p=0.000 n=8+10)
WithTimeout/concurrency=400000  8.58µs ± 7%  9.02µs ± 4%  +5.02%  (p=0.019 n=10+10)

name                                     old time/op  new time/op  delta
WithTimeout/concurrency=40-2      3.70µs ± 1%  2.78µs ± 4%  -24.90%  (p=0.000 n=8+9)
WithTimeout/concurrency=4000-2    4.49µs ± 4%  3.67µs ± 5%  -18.26%  (p=0.000 n=10+10)
WithTimeout/concurrency=400000-2  6.16µs ±10%  5.15µs ±13%  -16.30%  (p=0.000 n=10+10)

name                                     old time/op  new time/op  delta
WithTimeout/concurrency=40-4      3.58µs ± 1%  2.64µs ± 2%  -26.13%  (p=0.000 n=9+10)
WithTimeout/concurrency=4000-4    4.17µs ± 0%  3.32µs ± 1%  -20.36%  (p=0.000 n=10+10)
WithTimeout/concurrency=400000-4  5.57µs ± 9%  4.83µs ±10%  -13.27%  (p=0.001 n=10+10)

time package:

name                     old time/op  new time/op  delta
AfterFunc                6.15ms ± 3%  6.07ms ± 2%     ~     (p=0.133 n=10+9)
AfterFunc-2              3.43ms ± 1%  3.56ms ± 1%   +3.91%  (p=0.000 n=10+9)
AfterFunc-4              5.04ms ± 2%  2.36ms ± 0%  -53.20%  (p=0.000 n=10+9)
After                    6.54ms ± 2%  6.49ms ± 3%     ~     (p=0.393 n=10+10)
After-2                  3.68ms ± 1%  3.87ms ± 0%   +5.14%  (p=0.000 n=9+9)
After-4                  6.66ms ± 1%  2.87ms ± 1%  -56.89%  (p=0.000 n=10+10)
Stop                      698µs ± 2%   689µs ± 1%   -1.26%  (p=0.011 n=10+10)
Stop-2                    729µs ± 2%   434µs ± 3%  -40.49%  (p=0.000 n=10+10)
Stop-4                    837µs ± 3%   333µs ± 2%  -60.20%  (p=0.000 n=10+10)
SimultaneousAfterFunc     694µs ± 1%   692µs ± 7%     ~     (p=0.481 n=10+10)
SimultaneousAfterFunc-2   714µs ± 3%   569µs ± 2%  -20.33%  (p=0.000 n=10+10)
SimultaneousAfterFunc-4   782µs ± 2%   386µs ± 2%  -50.67%  (p=0.000 n=10+10)
StartStop                 267µs ± 3%   274µs ± 0%   +2.64%  (p=0.000 n=8+9)
StartStop-2               238µs ± 2%   140µs ± 3%  -40.95%  (p=0.000 n=10+8)
StartStop-4               320µs ± 1%   125µs ± 1%  -61.02%  (p=0.000 n=9+9)
Reset                    75.0µs ± 1%  77.5µs ± 2%   +3.38%  (p=0.000 n=10+10)
Reset-2                   150µs ± 2%    40µs ± 5%  -73.09%  (p=0.000 n=10+9)
Reset-4                   226µs ± 1%    33µs ± 1%  -85.42%  (p=0.000 n=10+10)
Sleep                     857µs ± 6%   878µs ± 9%     ~     (p=0.079 n=10+9)
Sleep-2                   617µs ± 4%   585µs ± 2%   -5.21%  (p=0.000 n=10+10)
Sleep-4                   689µs ± 3%   465µs ± 4%  -32.53%  (p=0.000 n=10+10)
Ticker                   55.9ms ± 2%  55.9ms ± 2%     ~     (p=0.971 n=10+10)
Ticker-2                 28.7ms ± 2%  28.1ms ± 1%   -2.06%  (p=0.000 n=10+10)
Ticker-4                 14.6ms ± 0%  13.6ms ± 1%   -6.80%  (p=0.000 n=9+10)

Fixes #15133

Change-Id: I6f4b09d2db8c5bec93146db6501b44dbfe5c0ac4
Reviewed-on: https://go-review.googlesource.com/34784
Reviewed-by: Austin Clements <austin@google.com>
  • Loading branch information...
valyala authored and ianlancetaylor committed Jan 6, 2017
1 parent 7377d0c commit 76f4fd8a5251b4f63ea14a3c1e2fe2e78eb74f81
@@ -7,10 +7,64 @@ package context_test
import (
. "context"
"fmt"
"runtime"
"sync"
"testing"
"time"
)
func BenchmarkContextCancelTree(b *testing.B) {
func BenchmarkWithTimeout(b *testing.B) {
for concurrency := 40; concurrency <= 4e5; concurrency *= 100 {
name := fmt.Sprintf("concurrency=%d", concurrency)
b.Run(name, func(b *testing.B) {
benchmarkWithTimeout(b, concurrency)
})
}
}
func benchmarkWithTimeout(b *testing.B, concurrentContexts int) {
gomaxprocs := runtime.GOMAXPROCS(0)
perPContexts := concurrentContexts / gomaxprocs
root := Background()
// Generate concurrent contexts.
var wg sync.WaitGroup
ccf := make([][]CancelFunc, gomaxprocs)
for i := range ccf {
wg.Add(1)
go func(i int) {
defer wg.Done()
cf := make([]CancelFunc, perPContexts)
for j := range cf {
_, cf[j] = WithTimeout(root, time.Hour)
}
ccf[i] = cf
}(i)
}
wg.Wait()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
wcf := make([]CancelFunc, 10)
for pb.Next() {
for i := range wcf {
_, wcf[i] = WithTimeout(root, time.Hour)
}
for _, f := range wcf {
f()
}
}
})
b.StopTimer()
for _, cf := range ccf {
for _, f := range cf {
f()
}
}
}
func BenchmarkCancelTree(b *testing.B) {
depths := []int{1, 10, 100, 1000}
for _, d := range depths {
b.Run(fmt.Sprintf("depth=%d", d), func(b *testing.B) {
@@ -270,8 +270,9 @@ func parseHeader(buf []byte) (int, error) {
// It does analyze and verify per-event-type arguments.
func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (events []*Event, stacks map[uint64][]*Frame, err error) {
var ticksPerSec, lastSeq, lastTs int64
var lastG, timerGoid uint64
var lastG uint64
var lastP int
timerGoids := make(map[uint64]bool)
lastGs := make(map[int]uint64) // last goroutine running on P
stacks = make(map[uint64][]*Frame)
batches := make(map[int][]*Event) // events by P
@@ -308,7 +309,7 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
return
}
case EvTimerGoroutine:
timerGoid = raw.args[0]
timerGoids[raw.args[0]] = true
case EvStack:
if len(raw.args) < 2 {
err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want at least 2, got %v",
@@ -431,7 +432,7 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
for _, ev := range events {
ev.Ts = int64(float64(ev.Ts-minTs) * freq)
// Move timers and syscalls to separate fake Ps.
if timerGoid != 0 && ev.G == timerGoid && ev.Type == EvGoUnblock {
if timerGoids[ev.G] && ev.Type == EvGoUnblock {
ev.P = TimerP
}
if ev.Type == EvGoSysExit {
View
@@ -3863,15 +3863,11 @@ func sysmon() {
}
shouldRelax := true
if osRelaxMinNS > 0 {
lock(&timers.lock)
if timers.sleeping {
now := nanotime()
next := timers.sleepUntil
if next-now < osRelaxMinNS {
shouldRelax = false
}
next := timeSleepUntil()
now := nanotime()
if next-now < osRelaxMinNS {
shouldRelax = false
}
unlock(&timers.lock)
}
if shouldRelax {
osRelax(true)
Oops, something went wrong.

0 comments on commit 76f4fd8

Please sign in to comment.