-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Description
Go version
go1.26-dev linux/amd64
Output of go env in your module/workspace:
N/AWhat did you do?
The docs state:
/sched/goroutines/not-in-go:goroutines
Approximate count of goroutines running or blocked in
a system call or cgo call. Not guaranteed to add up to
/sched/goroutines:goroutines with other goroutine metrics.
In production we've observed that there is a (static looking, non-trivial amount of) not-in-go goroutines that hang around.
One theory is that this is a not-in-go goroutine for every extraM that hasn't exited yet. As far as I understand, this would be an (unintended) side-effect of #51676. The M (and a locked G) are kept alive after a call from C to Go to speed up future invocations. That's fine, but outside of cgo calls, this does not represent a "live" goroutine, it (probably) shouldn't be counted as a goroutine that's not-in-go.
(Another side-effect of this caching is that pprof labels aren't cleared and could erroneously be "reused" across calls, I'll file a separate issue for that.)
I tried to reproduce this locally:
// goma.go
package main
import (
"C"
)
import (
"fmt"
"os"
"runtime/metrics"
"time"
)
//export PrintStats
func PrintStats() {
m := []metrics.Sample{
{Name: "/sched/gomaxprocs:threads"},
{Name: "/sched/goroutines/not-in-go:goroutines"},
{Name: "/sched/goroutines/runnable:goroutines"},
{Name: "/sched/goroutines/running:goroutines"},
{Name: "/sched/goroutines/waiting:goroutines"},
{Name: "/sched/goroutines:goroutines"},
{Name: "/sched/threads/total:threads"},
}
metrics.Read(m)
fmt.Fprintf(os.Stderr, "GOMAXPROCS=%d | not-in-go: %d | runnable: %d | running: %d | waiting: %d | total: %d | go-runtime-threads: %d\n",
m[0].Value.Uint64(), // GOMAXPROCS
m[1].Value.Uint64(), // not-in-go
m[2].Value.Uint64(), // runnable
m[3].Value.Uint64(), // running
m[4].Value.Uint64(), // waiting
m[5].Value.Uint64(), // goroutines
m[6].Value.Uint64(), // threads
)
}
//export Sleep
func Sleep(seconds uint64) {
time.Sleep(time.Duration(seconds) * time.Second)
}
func main() {}// c-to-go.c
#define _GNU_SOURCE
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
/* To produce, run:
*
* GO111MODULE=off go build --buildmode=c-archive
*
* NOTE: Go doesn't seem to like the pre-existing C file, and tries to compile
* it, which fails. So "temporarily" remove it.
*
* rm *.h *.a c-to-go ; mv c-to-go.c c-to-go.c.tmp
* GO111MODULE=off gotip build --buildmode=c-archive
* mv c-to-go.c.tmp c-to-go.c
* gcc -pthread c-to-go.c c-to-go.a -o c-to-go
* ./c-to-go
*/
#include "c-to-go.h"
static void* threadSleep(void* arg) {
fprintf(stderr, "%d: starting to Go sleep\n", gettid());
Sleep(3);
fprintf(stderr, "%d: ending Go sleep, starting C sleep\n", gettid());
sleep(3);
fprintf(stderr, "%d: ending C sleep\n", gettid());
return NULL;
}
static void* threadPrintStats(void* arg) {
for (int i = 0; i < 10; ++i) {
fprintf(stderr, "%d: ", gettid());
PrintStats();
usleep(1000 * 500);
}
return NULL;
}
#define handle_error_en(en, msg) \
do { \
errno = en; \
perror(msg); \
exit(EXIT_FAILURE); \
} while (0)
static pthread_t rspawn(const char* name, void* (*start_routine)(void*)) {
pthread_t tid = -1; /* ID returned by pthread_create() */
int err = pthread_create(&tid, NULL /*attr*/, start_routine, NULL);
if (err != 0) {
handle_error_en(err, "pthread_create");
}
printf("spawned %s: (pthread_t) %lu\n", name, tid);
return tid;
}
static void rjoin(pthread_t tid) {
int err = pthread_join(tid, NULL);
if (err != 0) {
handle_error_en(err, "pthread_join");
}
printf("joined (pthread_t) %lu\n", tid);
}
int main() {
PrintStats();
PrintStats();
pthread_t notInGoPrinter = rspawn(
"PRINTSTATS", threadPrintStats); /* periodically print some stats */
pthread_t threads[] = {
rspawn("SLEEP", threadSleep), rspawn("SLEEP", threadSleep),
rspawn("SLEEP", threadSleep), rspawn("SLEEP", threadSleep),
rspawn("SLEEP", threadSleep), rspawn("SLEEP", threadSleep),
};
for (int i = 0; i < sizeof(threads) / sizeof(threads[0]); i++) {
rjoin(threads[i]);
}
// fprintf(stderr, "MAIN: not-in-go: %llu\n", NotInGo());
PrintStats();
rjoin(notInGoPrinter);
PrintStats();
PrintStats();
sleep(2);
PrintStats();
return 0;
}Gives:
rm *.h *.a c-to-go ; mv c-to-go.c c-to-go.c.tmp ; GO111MODULE=off gotip build --buildmode=c-archive ; mv c-to-go.c.tmp c-to-go.c ; gcc -pthread c-to-go.c c-to-go.a -o c-to-go && ./c-to-go
GOMAXPROCS=64 | not-in-go: 0 | runnable: 0 | running: 1 | waiting: 5 | total: 6 | go-runtime-threads: 6
GOMAXPROCS=64 | not-in-go: 0 | runnable: 0 | running: 1 | waiting: 5 | total: 6 | go-runtime-threads: 6
spawned PRINTSTATS: (pthread_t) 140604453406400
1256849: spawned SLEEP: (pthread_t) 140604445013696
1256850: starting to Go sleep
spawned SLEEP: (pthread_t) 140604436620992
GOMAXPROCS=64 | not-in-go: 1 | runnable: 0 | running: 1 | waiting: 5 | total: 7 | go-runtime-threads: 6
1256851: starting to Go sleep
spawned SLEEP: (pthread_t) 140604428228288
1256852: starting to Go sleep
spawned SLEEP: (pthread_t) 140604335974080
1256853: starting to Go sleep
spawned SLEEP: (pthread_t) 140604327581376
1256854: starting to Go sleep
spawned SLEEP: (pthread_t) 140604319188672
1256855: starting to Go sleep
1256849: GOMAXPROCS=64 | not-in-go: 1 | runnable: 0 | running: 1 | waiting: 11 | total: 13 | go-runtime-threads: 6
1256849: GOMAXPROCS=64 | not-in-go: 1 | runnable: 0 | running: 1 | waiting: 11 | total: 13 | go-runtime-threads: 6
1256849: GOMAXPROCS=64 | not-in-go: 1 | runnable: 0 | running: 1 | waiting: 11 | total: 13 | go-runtime-threads: 6
1256849: GOMAXPROCS=64 | not-in-go: 1 | runnable: 0 | running: 1 | waiting: 11 | total: 13 | go-runtime-threads: 6
1256849: GOMAXPROCS=64 | not-in-go: 1 | runnable: 0 | running: 1 | waiting: 11 | total: 13 | go-runtime-threads: 6
1256849: GOMAXPROCS=64 | not-in-go: 1 | runnable: 0 | running: 1 | waiting: 11 | total: 13 | go-runtime-threads: 6
1256850: ending Go sleep, starting C sleep
1256854: ending Go sleep, starting C sleep
1256855: ending Go sleep, starting C sleep
1256852: ending Go sleep, starting C sleep
1256851: ending Go sleep, starting C sleep
1256853: ending Go sleep, starting C sleep
1256849: GOMAXPROCS=64 | not-in-go: 7 | runnable: 0 | running: 1 | waiting: 5 | total: 13 | go-runtime-threads: 6
1256849: GOMAXPROCS=64 | not-in-go: 7 | runnable: 0 | running: 1 | waiting: 5 | total: 13 | go-runtime-threads: 6
1256849: GOMAXPROCS=64 | not-in-go: 7 | runnable: 0 | running: 1 | waiting: 5 | total: 13 | go-runtime-threads: 6
1256850: ending C sleep
1256854: ending C sleep
1256855: ending C sleep
joined (pthread_t) 140604445013696
1256852: ending C sleep
1256853: ending C sleep
1256851: ending C sleep
joined (pthread_t) 140604436620992
joined (pthread_t) 140604428228288
joined (pthread_t) 140604335974080
joined (pthread_t) 140604327581376
joined (pthread_t) 140604319188672
GOMAXPROCS=64 | not-in-go: 0 | runnable: 0 | running: 1 | waiting: 12 | total: 13 | go-runtime-threads: 6
joined (pthread_t) 140604453406400
GOMAXPROCS=64 | not-in-go: 0 | runnable: 0 | running: 1 | waiting: 12 | total: 13 | go-runtime-threads: 6
GOMAXPROCS=64 | not-in-go: 0 | runnable: 0 | running: 1 | waiting: 12 | total: 13 | go-runtime-threads: 6
GOMAXPROCS=64 | not-in-go: 0 | runnable: 0 | running: 1 | waiting: 12 | total: 13 | go-runtime-threads: 6For reference, the calculating code:
Lines 814 to 859 in feae743
| for _, p := range allp { | |
| if p == nil || p.status == _Pdead { | |
| break | |
| } | |
| a.gCreated += p.goroutinesCreated | |
| switch p.status { | |
| case _Prunning: | |
| if thread, ok := setBlockOnExitSyscall(p); ok { | |
| thread.resume() | |
| a.gNonGo++ | |
| } else { | |
| a.gRunning++ | |
| } | |
| case _Pgcstop: | |
| // The world is stopping or stopped. | |
| // This is fine. The results will be | |
| // slightly odd since nothing else | |
| // is running, but it will be accurate. | |
| } | |
| for { | |
| h := atomic.Load(&p.runqhead) | |
| t := atomic.Load(&p.runqtail) | |
| next := atomic.Loaduintptr((*uintptr)(&p.runnext)) | |
| runnable := int32(t - h) | |
| if atomic.Load(&p.runqhead) != h || runnable < 0 { | |
| continue | |
| } | |
| if next != 0 { | |
| runnable++ | |
| } | |
| a.gRunnable += uint64(runnable) | |
| break | |
| } | |
| } | |
| // Global run queue. | |
| a.gRunnable += uint64(sched.runq.size) | |
| // Account for Gs that are in _Gsyscall without a P. | |
| nGsyscallNoP := sched.nGsyscallNoP.Load() | |
| // nGsyscallNoP can go negative during temporary races. | |
| if nGsyscallNoP >= 0 { | |
| a.gNonGo += uint64(nGsyscallNoP) | |
| } |
What did you see happen?
In production, we see many more not-in-go goroutines than we expect. Those goroutines (we believe) appear as "stackless" in goroutine profiles too (having no stack frames).
What did you expect to see?
While I'd expect these cached threads to show up in /sched/threads/total:threads, I did not expect to see them in not-in-go.
In the example above, we see that during the C sleep phase, the number of not-in-go "goroutines" goes up, despite the state of these threads being pure C.
Metadata
Metadata
Assignees
Labels
Type
Projects
Status