@@ -127,7 +127,8 @@ func (c *Client) insertJob(ctx context.Context, job *bq.Job, media io.Reader) (*
127
127
// have to read the contents and keep it in memory, and that could be expensive.
128
128
// TODO(jba): Look into retrying if media != nil.
129
129
if job .JobReference != nil && media == nil {
130
- err = runWithRetry (ctx , invoke )
130
+ // We deviate from default retries due to BigQuery wanting to retry structured internal job errors.
131
+ err = runWithRetryExplicit (ctx , invoke , jobRetryReasons )
131
132
} else {
132
133
err = invoke ()
133
134
}
@@ -152,7 +153,7 @@ func (c *Client) runQuery(ctx context.Context, queryRequest *bq.QueryRequest) (*
152
153
}
153
154
154
155
// We control request ID, so we can always runWithRetry.
155
- err = runWithRetry (ctx , invoke )
156
+ err = runWithRetryExplicit (ctx , invoke , jobRetryReasons )
156
157
if err != nil {
157
158
return nil , err
158
159
}
@@ -174,6 +175,10 @@ func unixMillisToTime(m int64) time.Time {
174
175
// See the similar function in ../storage/invoke.go. The main difference is the
175
176
// reason for retrying.
176
177
func runWithRetry (ctx context.Context , call func () error ) error {
178
+ return runWithRetryExplicit (ctx , call , defaultRetryReasons )
179
+ }
180
+
181
+ func runWithRetryExplicit (ctx context.Context , call func () error , allowedReasons []string ) error {
177
182
// These parameters match the suggestions in https://cloud.google.com/bigquery/sla.
178
183
backoff := gax.Backoff {
179
184
Initial : 1 * time .Second ,
@@ -185,15 +190,20 @@ func runWithRetry(ctx context.Context, call func() error) error {
185
190
if err == nil {
186
191
return true , nil
187
192
}
188
- return ! retryableError (err ), err
193
+ return ! retryableError (err , allowedReasons ), err
189
194
})
190
195
}
191
196
197
+ var (
198
+ defaultRetryReasons = []string {"backendError" , "rateLimitExceeded" }
199
+ jobRetryReasons = []string {"backendError" , "rateLimitExceeded" , "internalError" }
200
+ )
201
+
192
202
// This is the correct definition of retryable according to the BigQuery team. It
193
203
// also considers 502 ("Bad Gateway") and 503 ("Service Unavailable") errors
194
204
// retryable; these are returned by systems between the client and the BigQuery
195
205
// service.
196
- func retryableError (err error ) bool {
206
+ func retryableError (err error , allowedReasons [] string ) bool {
197
207
if err == nil {
198
208
return false
199
209
}
@@ -215,8 +225,13 @@ func retryableError(err error) bool {
215
225
var reason string
216
226
if len (e .Errors ) > 0 {
217
227
reason = e .Errors [0 ].Reason
228
+ for _ , r := range allowedReasons {
229
+ if reason == r {
230
+ return true
231
+ }
232
+ }
218
233
}
219
- if e .Code == http .StatusServiceUnavailable || e .Code == http .StatusBadGateway || reason == "backendError" || reason == "rateLimitExceeded" {
234
+ if e .Code == http .StatusServiceUnavailable || e .Code == http .StatusBadGateway {
220
235
return true
221
236
}
222
237
case * url.Error :
@@ -233,7 +248,7 @@ func retryableError(err error) bool {
233
248
}
234
249
// Unwrap is only supported in go1.13.x+
235
250
if e , ok := err .(interface { Unwrap () error }); ok {
236
- return retryableError (e .Unwrap ())
251
+ return retryableError (e .Unwrap (), allowedReasons )
237
252
}
238
253
return false
239
254
}
0 commit comments