-
Notifications
You must be signed in to change notification settings - Fork 25
fix: Clean up usage retry logic #1950
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
3d5f451
fix: Clean up usage retry logic
disq 2de8ca8
More clean up: Utilize built-in go-retryablehttp in apiClient for api…
disq d20a7f3
Remove redundant `Retry-After` handling (as AWS probably doesn't resp…
disq 12982ca
CR: use `max`, doh
disq fd71b5e
"over charge" -> "overcharge"
disq c3b9b56
Add mock test for duplicate marketplace requests
disq 877a1ef
Use separate timeFunc for BatchUpdater
disq 47ca61d
Simplify more
disq 25c1593
CR: Log error
disq 66433a6
Merge branch 'main' into fix/usage-retry-clean-up
marianogappa d40ec9a
Merge branch 'main' into fix/usage-retry-clean-up
disq bb8b56e
Extract dry run mock to helper
disq fde2bd9
Use timeFunc instead of time.Now
disq File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,8 +21,8 @@ import ( | |
| "github.com/cloudquery/cloudquery-api-go/config" | ||
| "github.com/cloudquery/plugin-sdk/v4/plugin" | ||
| "github.com/google/uuid" | ||
| "github.com/hashicorp/go-retryablehttp" | ||
| "github.com/rs/zerolog" | ||
| "github.com/rs/zerolog/log" | ||
| ) | ||
|
|
||
| const ( | ||
|
|
@@ -32,6 +32,9 @@ const ( | |
| defaultMaxWaitTime = 60 * time.Second | ||
| defaultMinTimeBetweenFlushes = 10 * time.Second | ||
| defaultMaxTimeBetweenFlushes = 30 * time.Second | ||
|
|
||
| marketplaceDuplicateWaitTime = 1 * time.Second | ||
| marketplaceMinRetries = 20 | ||
| ) | ||
|
|
||
| const ( | ||
|
|
@@ -109,7 +112,9 @@ func WithMinTimeBetweenFlushes(minTimeBetweenFlushes time.Duration) UsageClientO | |
| // WithMaxRetries sets the maximum number of retries to update the usage in case of an API error | ||
| func WithMaxRetries(maxRetries int) UsageClientOptions { | ||
| return func(updater *BatchUpdater) { | ||
| updater.maxRetries = maxRetries | ||
| if maxRetries > 0 { | ||
| updater.maxRetries = maxRetries | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -198,6 +203,9 @@ type BatchUpdater struct { | |
| isClosed bool | ||
| dataOnClose bool | ||
| usageIncreaseMethod int | ||
|
|
||
| // Testing | ||
| timeFunc func() time.Time | ||
| } | ||
|
|
||
| func NewUsageClient(meta plugin.Meta, ops ...UsageClientOptions) (UsageClient, error) { | ||
|
|
@@ -216,6 +224,7 @@ func NewUsageClient(meta plugin.Meta, ops ...UsageClientOptions) (UsageClient, e | |
| triggerUpdate: make(chan struct{}), | ||
| done: make(chan struct{}), | ||
| closeError: make(chan error), | ||
| timeFunc: time.Now, | ||
|
|
||
| tables: map[string]uint32{}, | ||
| } | ||
|
|
@@ -246,18 +255,26 @@ func NewUsageClient(meta plugin.Meta, ops ...UsageClientOptions) (UsageClient, e | |
|
|
||
| // Create a default api client if none was provided | ||
| if u.apiClient == nil { | ||
| ac, err := cqapi.NewClientWithResponses(u.url, cqapi.WithRequestEditorFn(func(_ context.Context, req *http.Request) error { | ||
| token, err := u.tokenClient.GetToken() | ||
| if err != nil { | ||
| return fmt.Errorf("failed to get token: %w", err) | ||
| } | ||
| req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token)) | ||
| return nil | ||
| })) | ||
| retryClient := retryablehttp.NewClient() | ||
| retryClient.Logger = nil | ||
| retryClient.RetryMax = u.maxRetries | ||
| retryClient.RetryWaitMax = u.maxWaitTime | ||
|
|
||
| var err error | ||
| u.apiClient, err = cqapi.NewClientWithResponses(u.url, | ||
| cqapi.WithRequestEditorFn(func(_ context.Context, req *http.Request) error { | ||
| token, err := u.tokenClient.GetToken() | ||
| if err != nil { | ||
| return fmt.Errorf("failed to get token: %w", err) | ||
| } | ||
| req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token)) | ||
| return nil | ||
| }), | ||
| cqapi.WithHTTPClient(retryClient.StandardClient()), | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is what the default |
||
| ) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to create api client: %w", err) | ||
| } | ||
| u.apiClient = ac | ||
| } | ||
|
|
||
| // Set team name from configuration if not provided | ||
|
|
@@ -289,11 +306,12 @@ func (u *BatchUpdater) setupAWSMarketplace() error { | |
| u.batchLimit = 1000000000 | ||
|
|
||
| u.minTimeBetweenFlushes = 1 * time.Minute | ||
| u.maxRetries = max(u.maxRetries, marketplaceMinRetries) | ||
| u.backgroundUpdater() | ||
|
|
||
| _, err = u.awsMarketplaceClient.MeterUsage(ctx, &marketplacemetering.MeterUsageInput{ | ||
| ProductCode: aws.String(awsMarketplaceProductCode()), | ||
| Timestamp: aws.Time(time.Now()), | ||
| Timestamp: aws.Time(u.timeFunc()), | ||
| UsageDimension: aws.String("rows"), | ||
| UsageQuantity: aws.Int32(int32(0)), | ||
| DryRun: aws.Bool(true), | ||
|
|
@@ -486,13 +504,13 @@ func (u *BatchUpdater) backgroundUpdater() { | |
| } | ||
| // If we are using AWS Marketplace, we need to round down to the nearest 1000 | ||
| // Only on the last update, will we round up to the nearest 1000 | ||
| // This will allow us to not over charge the customer by rounding on each batch | ||
| // This will allow us to not overcharge the customer by rounding on each batch | ||
| if u.awsMarketplaceClient != nil { | ||
| totals = roundDown(totals, 1000) | ||
| } | ||
|
|
||
| if err := u.updateUsageWithRetryAndBackoff(ctx, totals, tables); err != nil { | ||
| log.Warn().Err(err).Msg("failed to update usage") | ||
| u.logger.Warn().Err(err).Msg("failed to update usage") | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cleaning up some "untethered" logger usage. |
||
| continue | ||
| } | ||
| u.subtractTableUsage(tables, totals) | ||
|
|
@@ -510,12 +528,12 @@ func (u *BatchUpdater) backgroundUpdater() { | |
| } | ||
| // If we are using AWS Marketplace, we need to round down to the nearest 1000 | ||
| // Only on the last update, will we round up to the nearest 1000 | ||
| // This will allow us to not over charge the customer by rounding on each batch | ||
| // This will allow us to not overcharge the customer by rounding on each batch | ||
| if u.awsMarketplaceClient != nil { | ||
| totals = roundDown(totals, 1000) | ||
| } | ||
| if err := u.updateUsageWithRetryAndBackoff(ctx, totals, tables); err != nil { | ||
| log.Warn().Err(err).Msg("failed to update usage") | ||
| u.logger.Warn().Err(err).Msg("failed to update usage") | ||
| continue | ||
| } | ||
| u.subtractTableUsage(tables, totals) | ||
|
|
@@ -573,60 +591,70 @@ func (u *BatchUpdater) reportUsageToAWSMarketplace(ctx context.Context, rows uin | |
| // Each product is given a unique product code when it is listed in AWS Marketplace | ||
| // in the future we can have multiple product codes for container or AMI based listings | ||
| ProductCode: aws.String(awsMarketplaceProductCode()), | ||
| Timestamp: aws.Time(time.Now()), | ||
| Timestamp: aws.Time(u.timeFunc()), | ||
| UsageDimension: aws.String("rows"), | ||
| UsageAllocations: usage, | ||
| UsageQuantity: aws.Int32(int32(rows)), | ||
| }) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to update usage with : %w", err) | ||
| return fmt.Errorf("failed to update usage: %w", err) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func (u *BatchUpdater) updateUsageWithRetryAndBackoff(ctx context.Context, rows uint32, tables []cqapi.UsageIncreaseTablesInner) error { | ||
| func (u *BatchUpdater) updateMarketplaceUsage(ctx context.Context, rows uint32) error { | ||
| var lastErr error | ||
| for retry := 0; retry < u.maxRetries; retry++ { | ||
| u.logger.Debug().Str("url", u.url).Int("try", retry).Int("max_retries", u.maxRetries).Uint32("rows", rows).Msg("updating usage") | ||
| queryStartTime := time.Now() | ||
| u.logger.Debug().Int("try", retry).Int("max_retries", u.maxRetries).Uint32("rows", rows).Msg("updating usage") | ||
bbernays marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // If the AWS Marketplace client is set, use it to track usage | ||
| if u.awsMarketplaceClient != nil { | ||
| return u.reportUsageToAWSMarketplace(ctx, rows) | ||
| } | ||
| payload := cqapi.IncreaseTeamPluginUsageJSONRequestBody{ | ||
| RequestId: uuid.New(), | ||
| PluginTeam: u.pluginMeta.Team, | ||
| PluginKind: u.pluginMeta.Kind, | ||
| PluginName: u.pluginMeta.Name, | ||
| Rows: int(rows), | ||
| lastErr = u.reportUsageToAWSMarketplace(ctx, rows) | ||
| if lastErr == nil { | ||
| u.logger.Debug().Int("try", retry).Uint32("rows", rows).Msg("usage updated") | ||
| return nil | ||
| } | ||
|
|
||
| if len(tables) > 0 { | ||
| payload.Tables = &tables | ||
| var de *types.DuplicateRequestException | ||
| if !errors.As(lastErr, &de) { | ||
| return fmt.Errorf("failed to update usage: %w", lastErr) | ||
| } | ||
| u.logger.Debug().Err(lastErr).Int("try", retry).Uint32("rows", rows).Msg("usage update failed due to duplicate request") | ||
|
|
||
| resp, err := u.apiClient.IncreaseTeamPluginUsageWithResponse(ctx, u.teamName, payload) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to update usage: %w", err) | ||
| } | ||
| if resp.StatusCode() >= 200 && resp.StatusCode() < 300 { | ||
| u.logger.Debug().Str("url", u.url).Int("try", retry).Int("status_code", resp.StatusCode()).Uint32("rows", rows).Msg("usage updated") | ||
| u.lastUpdateTime = time.Now().UTC() | ||
| if resp.HTTPResponse != nil { | ||
| u.updateConfigurationFromHeaders(resp.HTTPResponse.Header) | ||
| } | ||
| return nil | ||
| } | ||
| jitter := time.Duration(rand.Intn(1000)) * time.Millisecond | ||
disq marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| time.Sleep(marketplaceDuplicateWaitTime + jitter) | ||
| } | ||
| return fmt.Errorf("failed to update usage: max retries exceeded: %w", lastErr) | ||
| } | ||
|
|
||
| retryDuration, err := u.calculateRetryDuration(resp.StatusCode(), resp.HTTPResponse.Header, queryStartTime, retry) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to calculate retry duration: %w", err) | ||
| } | ||
| if retryDuration > 0 { | ||
| time.Sleep(retryDuration) | ||
| func (u *BatchUpdater) updateUsageWithRetryAndBackoff(ctx context.Context, rows uint32, tables []cqapi.UsageIncreaseTablesInner) error { | ||
| // If the AWS Marketplace client is set, use it to track usage | ||
| if u.awsMarketplaceClient != nil { | ||
| return u.updateMarketplaceUsage(ctx, rows) | ||
| } | ||
|
|
||
| u.logger.Debug().Str("url", u.url).Uint32("rows", rows).Msg("updating usage") | ||
| payload := cqapi.IncreaseTeamPluginUsageJSONRequestBody{ | ||
| RequestId: uuid.New(), | ||
| PluginTeam: u.pluginMeta.Team, | ||
| PluginKind: u.pluginMeta.Kind, | ||
| PluginName: u.pluginMeta.Name, | ||
| Rows: int(rows), | ||
| } | ||
|
|
||
| if len(tables) > 0 { | ||
| payload.Tables = &tables | ||
| } | ||
|
|
||
| resp, err := u.apiClient.IncreaseTeamPluginUsageWithResponse(ctx, u.teamName, payload) | ||
| if err == nil && resp.StatusCode() >= 200 && resp.StatusCode() < 300 { | ||
| u.logger.Debug().Str("url", u.url).Int("status_code", resp.StatusCode()).Uint32("rows", rows).Msg("usage updated") | ||
| u.lastUpdateTime = u.timeFunc().UTC() | ||
| if resp.HTTPResponse != nil { | ||
| u.updateConfigurationFromHeaders(resp.HTTPResponse.Header) | ||
| } | ||
| return nil | ||
| } | ||
| return fmt.Errorf("failed to update usage: max retries exceeded") | ||
|
|
||
| return fmt.Errorf("failed to update usage: %w", err) | ||
| } | ||
|
|
||
| // updateConfigurationFromHeaders updates the configuration based on the headers returned by the API | ||
|
|
@@ -663,33 +691,6 @@ func (u *BatchUpdater) updateConfigurationFromHeaders(header http.Header) { | |
| } | ||
| } | ||
|
|
||
| // calculateRetryDuration calculates the duration to sleep relative to the query start time before retrying an update | ||
| func (u *BatchUpdater) calculateRetryDuration(statusCode int, headers http.Header, queryStartTime time.Time, retry int) (time.Duration, error) { | ||
| if !retryableStatusCode(statusCode) { | ||
| return 0, fmt.Errorf("non-retryable status code: %d", statusCode) | ||
| } | ||
|
|
||
| // Check if we have a retry-after header | ||
| retryAfter := headers.Get("Retry-After") | ||
| if retryAfter != "" { | ||
| retryDelay, err := time.ParseDuration(retryAfter + "s") | ||
| if err != nil { | ||
| return 0, fmt.Errorf("failed to parse retry-after header: %w", err) | ||
| } | ||
| return retryDelay, nil | ||
| } | ||
|
|
||
| // Calculate exponential backoff | ||
| baseRetry := min(time.Duration(1<<retry)*time.Second, u.maxWaitTime) | ||
| jitter := time.Duration(rand.Intn(1000)) * time.Millisecond | ||
| retryDelay := baseRetry + jitter | ||
| return retryDelay - time.Since(queryStartTime), nil | ||
| } | ||
|
|
||
| func retryableStatusCode(statusCode int) bool { | ||
| return statusCode == http.StatusTooManyRequests || statusCode == http.StatusServiceUnavailable | ||
| } | ||
|
|
||
| func (u *BatchUpdater) getTeamNameByTokenType(tokenType auth.TokenType) (string, error) { | ||
| switch tokenType { | ||
| case auth.BearerToken: | ||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This means that retries can never be turned off?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maxRetries = 1 would be "single request, no retries" but I didn't want to be pedantic about the loop and the descriptions.