New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
adding timeout to remote cluster allocate call and adding total timeout to allocate #1815
Changes from 1 commit
da989a1
cd28618
7b92a82
c25c5cf
078048e
8b09614
1a8120b
7561631
43af4ca
9cdbc50
83e3c9b
77921e2
1e74784
42d5ded
20446fa
8355984
113ad97
a5fbe84
60db17f
3e9e150
c948064
4f36c33
7e4b9f8
e35092a
9f466db
4a910fd
b3e2817
e16e75f
8bdf18c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,6 +82,13 @@ const ( | |
batchWaitTime = 500 * time.Millisecond | ||
) | ||
|
||
const ( | ||
// Timeout for an individual Allocate RPC call can take | ||
remoteAllocateTimeout = 10 * time.Second | ||
// Total timeout for allocation including retries | ||
totalRemoteAllocationTimeout = 30 * time.Second | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The timeout is set, but not used? Fail with DEADLINE_EXCEEDED when the request does not complete by 30s. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for catching this. Added. |
||
) | ||
|
||
var allocationRetry = wait.Backoff{ | ||
Steps: 5, | ||
Duration: 10 * time.Millisecond, | ||
|
@@ -106,7 +113,7 @@ type Allocator struct { | |
pendingRequests chan request | ||
readyGameServerCache *ReadyGameServerCache | ||
topNGameServerCount int | ||
remoteAllocationCallback func(string, grpc.DialOption, *pb.AllocationRequest) (*pb.AllocationResponse, error) | ||
remoteAllocationCallback func(context.Context, string, grpc.DialOption, *pb.AllocationRequest) (*pb.AllocationResponse, error) | ||
} | ||
|
||
// request is an async request for allocation | ||
|
@@ -133,15 +140,17 @@ func NewAllocator(policyInformer multiclusterinformerv1.GameServerAllocationPoli | |
secretSynced: secretInformer.Informer().HasSynced, | ||
readyGameServerCache: readyGameServerCache, | ||
topNGameServerCount: topNGameServerDefaultCount, | ||
remoteAllocationCallback: func(endpoint string, dialOpts grpc.DialOption, request *pb.AllocationRequest) (*pb.AllocationResponse, error) { | ||
remoteAllocationCallback: func(ctx context.Context, endpoint string, dialOpts grpc.DialOption, request *pb.AllocationRequest) (*pb.AllocationResponse, error) { | ||
conn, err := grpc.Dial(endpoint, dialOpts) | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer conn.Close() // nolint: errcheck | ||
|
||
allocationCtx, cancel := context.WithTimeout(ctx, remoteAllocateTimeout) | ||
defer cancel() // nolint: errcheck | ||
grpcClient := pb.NewAllocationServiceClient(conn) | ||
return grpcClient.Allocate(context.Background(), request) | ||
return grpcClient.Allocate(allocationCtx, request) | ||
}, | ||
} | ||
|
||
|
@@ -336,13 +345,15 @@ func (c *Allocator) allocateFromRemoteCluster(gsa *allocationv1.GameServerAlloca | |
request.MultiClusterSetting.Enabled = false | ||
request.Namespace = connectionInfo.Namespace | ||
|
||
ctx, cancel := context.WithTimeout(context.Background(), totalRemoteAllocationTimeout) | ||
defer cancel() // nolint: errcheck | ||
// Retry on remote call failures. | ||
err = Retry(remoteAllocationRetry, func() error { | ||
for i, ip := range connectionInfo.AllocationEndpoints { | ||
endpoint := addPort(ip) | ||
c.loggerForGameServerAllocationKey("remote-allocation").WithField("request", request).WithField("endpoint", endpoint).Debug("forwarding allocation request") | ||
|
||
allocationResponse, err = c.remoteAllocationCallback(endpoint, dialOpts, request) | ||
allocationResponse, err = c.remoteAllocationCallback(ctx, endpoint, dialOpts, request) | ||
if err != nil { | ||
c.baseLogger.Errorf("remote allocation failed with: %v", err) | ||
// If there are multiple enpoints for the allocator connection and the current one is | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also the client timeout needs to be set on the request context: https://grpc.io/blog/deadlines/#go
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I am setting it
agones/pkg/gameserverallocations/allocator.go
Line 153 in cd28618
Unless you mean something else?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks.