-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Description
Go version
go version go1.24.9 darwin/arm64
Output of go env in your module/workspace:
AR='ar'
CC='clang'
CGO_CFLAGS='-O2 -g'
CGO_CPPFLAGS=''
CGO_CXXFLAGS='-O2 -g'
CGO_ENABLED='1'
CGO_FFLAGS='-O2 -g'
CGO_LDFLAGS='-O2 -g'
CXX='clang++'
GCCGO='gccgo'
GO111MODULE=''
GOARCH='arm64'
GOARM64='v8.0'
GOAUTH='netrc'
GOBIN=''
GOCACHE='/Users/joshua.kim/Library/Caches/go-build'
GOCACHEPROG=''
GODEBUG=''
GOENV='/Users/joshua.kim/Library/Application Support/go/env'
GOEXE=''
GOEXPERIMENT=''
GOFIPS140='off'
GOFLAGS=''
GOGCCFLAGS='-fPIC -arch arm64 -pthread -fno-caret-diagnostics -Qunused-arguments -fmessage-length=0 -ffile-prefix-map=/var/folders/df/s96cgq690vl3yl2blhrsy1rm0000gn/T/go-build1773876167=/tmp/go-build -gno-record-gcc-switches -fno-common'
GOHOSTARCH='arm64'
GOHOSTOS='darwin'
GOINSECURE=''
GOMOD='/Users/joshua.kim/workspace/avalanchego/go.mod'
GOMODCACHE='/Users/joshua.kim/golang/pkg/mod'
GONOPROXY='github.com/ava-labs/coreth-internal'
GONOSUMDB='github.com/ava-labs/coreth-internal'
GOOS='darwin'
GOPATH='/Users/joshua.kim/golang'
GOPRIVATE='github.com/ava-labs/coreth-internal'
GOPROXY='https://proxy.golang.org,direct'
GOROOT='/Users/joshua.kim/golang/pkg/mod/golang.org/toolchain@v0.0.1-go1.24.9.darwin-arm64'
GOSUMDB='sum.golang.org'
GOTELEMETRY='local'
GOTELEMETRYDIR='/Users/joshua.kim/Library/Application Support/go/telemetry'
GOTMPDIR=''
GOTOOLCHAIN='auto'
GOTOOLDIR='/Users/joshua.kim/golang/pkg/mod/golang.org/toolchain@v0.0.1-go1.24.9.darwin-arm64/pkg/tool/darwin_arm64'
GOVCS=''
GOVERSION='go1.24.9'
GOWORK='/Users/joshua.kim/workspace/go.work'
PKG_CONFIG='pkg-config'What did you do?
@michaelkaplan13 ran a repro of some behavior we're seeing in prod where we see our clients sending RST_STREAM even after receiving END_STREAM
package main
import (
"context"
"fmt"
"log"
"os"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/ava-labs/avalanchego/utils/rpc"
"github.com/ava-labs/avalanchego/vms/proposervm"
)
func main() {
const numThreads = 100
const reqsPerThread = 100
const targetURL = "https://api.avax-test.network/ext/bc/C/proposervm"
// Check for optional token parameter
var token string
if len(os.Args) > 1 {
token = os.Args[1]
fmt.Printf("Using token: %s\n", token)
} else {
fmt.Printf("No token provided\n")
}
fmt.Printf("Starting load test with %d threads, %d requests per thread\n", numThreads, reqsPerThread)
fmt.Printf("Target URL: %s\n", targetURL)
fmt.Printf("Total requests: %d\n", numThreads*reqsPerThread)
var totalErrors int64
var goawayErrors int64
var wg sync.WaitGroup
startTime := time.Now()
for i := 0; i < numThreads; i++ {
wg.Add(1)
go func(threadID int) {
defer wg.Done()
runThread(threadID, targetURL, token, reqsPerThread, &totalErrors, &goawayErrors)
}(i)
}
wg.Wait()
duration := time.Since(startTime)
totalRequests := numThreads * reqsPerThread
fmt.Printf("Completed %d requests in %v (%.2f req/sec)\n",
totalRequests, duration, float64(totalRequests)/duration.Seconds())
fmt.Printf("Total errors: %d\n", atomic.LoadInt64(&totalErrors))
fmt.Printf("GOAWAY errors: %d\n", atomic.LoadInt64(&goawayErrors))
}
func runThread(threadID int, targetURL string, token string, reqsPerThread int, totalErrors *int64, goawayErrors *int64) {
client := &proposervm.JSONRPCClient{
Requester: rpc.NewEndpointRequester(targetURL),
}
ctx := context.Background()
for i := 0; i < reqsPerThread; i++ {
var err error
if token != "" {
_, err = client.GetCurrentEpoch(ctx, rpc.WithQueryParam("token", token))
} else {
_, err = client.GetCurrentEpoch(ctx)
}
if err != nil {
atomic.AddInt64(totalErrors, 1)
errorStr := err.Error()
if strings.Contains(strings.ToUpper(errorStr), "GOAWAY") {
atomic.AddInt64(goawayErrors, 1)
}
log.Printf("thread %d request %d errored: %v", threadID, i, err)
}
}
}
What did you see happen?
This seems to be the same behavior that Cloudflare mentioned in one of their engineering blog posts (ref).
Filtering out the frame data for a conn + stream id pair in our logs:
2025/11/10 12:40:44 http2: Framer 0x14000838460: wrote HEADERS flags=END_HEADERS stream=83 len=8
2025/11/10 12:40:44 http2: Framer 0x14000838460: wrote DATA flags=END_STREAM stream=83 len=92 data="{\"jsonrpc\":\"2.0\",\"method\":\"proposervm.getCurrentEpoch\",\"params\":{},\"id\":4399340223224405049}"
2025/11/10 12:40:45 http2: Framer 0x14000838460: read HEADERS flags=END_HEADERS stream=83 len=127
2025/11/10 12:40:45 http2: Framer 0x14000838460: read DATA stream=83 len=131 data="\x1f\x8b\b\x00\x00\x00\x00\x00\x00\x03\x14\xcc\xc1\r\xc20\f\x05\xd0]\xfe9B\xc6v\x13\xe2+\x17\x06`\x81R\"\x1aDC\x95\xa4\xa7\xaa\xbb#\xde\x00oǻ}K]'\x18\xf8Dp\xa8\xa9m\x9f\x0e\xdbQ\xb6\xe5\x91*\f\xa2\xe1\x02\x87\xd6\xc7\xda\xefyI0\x9c\x83\xe7\x10\xbdx\x85\xc3z\x9d\xc7\\n)\xbf\xe6\xfe\x7fT\x83g\x1c\x0e\xf9\tS\x89Q\x94\x98\x85Y\x95\x06\":~\x00\x00\x00\xff\xff\x03\x00RnhAv\x00\x00\x00"
2025/11/10 12:40:45 http2: Framer 0x14000838460: read DATA flags=END_STREAM stream=83 len=0 data=""
2025/11/10 12:40:45 http2: Framer 0x14000838460: wrote RST_STREAM stream=83 len=4 ErrCode=CANCEL
Going through the frame data, our client finishes sending our response, so we write END_STREAM to indicate we’re done with our side of the stream:
2025/11/10 12:40:44 http2: Framer 0x14000838460: wrote DATA flags=END_STREAM stream=83 len=92 data="{\"jsonrpc\":\"2.0\",\"method\":\"proposervm.getCurrentEpoch\",\"params\":{},\"id\":4399340223224405049}"
Client sees that the server wrote END_STREAM to close its end of the stream
2025/11/10 12:40:45 http2: Framer 0x14000838460: read DATA flags=END_STREAM stream=83 len=0 data=""
Client writes RST_STREAM on the stream to try to timeout:
2025/11/10 12:40:45 http2: Framer 0x14000838460: wrote RST_STREAM stream=83 len=4 ErrCode=CANCEL
Referencing the h2 spec (ref), this frame isn’t actually needed because the stream state is already closed.
Endpoints MUST ignore
WINDOW_UPDATE or RST_STREAM frames received in this state, though
endpoints MAY choose to treat frames that arrive a significant
time after sending END_STREAM as a connection error
(Section 5.4.1) of type PROTOCOL_ERROR.
So it does look like this we are seeing the same behavior CF saw where RST_STREAM is being sent unnecessarily following a client receiving an END_STREAM. I don't think that sending a RST_STREAM after the stream is in a closed state itself is detrimental - but the problem is that a PING frame is sent with a RST_STREAM to let the client distinguish between a slow and unresponsive server (ref), which causes us to get ping rate limited and triggers a connection close.
What did you expect to see?
RST_STREAM should not be sent after END_STREAM is received.