Skip to content

[query] Unhandled transient error for GCS 503s #13937

@daniel-goldstein

Description

@daniel-goldstein

What happened?

GCS library throws a StorageException: Unknown Error on 503s resulting in the below stacktrace. Such a transient error should be gracefully retried.

Version

0.2.124

Relevant log output

hail.utils.java.FatalError: NullPointerException: null

Java stack trace:
is.hail.relocated.com.google.cloud.storage.StorageException: Unknown Error
	|> PUT https://storage.googleapis.com/upload/storage/v1/b/aou_analysis/o?name=250k/data/utils/aou_variant_qc_250k.ht/index/part-57205-e0113aa0-c1e8-43fc-af14-ccb68d989bd5.idx/index&uploadType=resumable&upload_id=ABPtcPrw7n_weAuHvL4cEyCdL-JKVVX-HaG7fnwAjTgRn4Uxm0JdIcWYasCHyuvK36Fc1UgVJkDC8kvlFgWcDkBcEy-_jxjQZpEFxJb2W8gLRkOavA
	|> content-range: bytes 0-50129/50130
	|> x-goog-gcs-idempotency-token: 5e36e53c-5dce-4690-844b-2cfd6f553861
	|  
	|< HTTP/1.1 503 Service Unavailable
	|< content-length: 0
	|< content-type: text/plain; charset=utf-8
	|< x-guploader-uploadid: ABPtcPrw7n_weAuHvL4cEyCdL-JKVVX-HaG7fnwAjTgRn4Uxm0JdIcWYasCHyuvK36Fc1UgVJkDC8kvlFgWcDkBcEy-_jxjQZpEFxJb2W8gLRkOavA
	|  
	at is.hail.relocated.com.google.cloud.storage.JsonResumableSessionFailureScenario.toStorageException(JsonResumableSessionFailureScenario.java:185)
	at is.hail.relocated.com.google.cloud.storage.JsonResumableSessionFailureScenario.toStorageException(JsonResumableSessionFailureScenario.java:117)
	at is.hail.relocated.com.google.cloud.storage.JsonResumableSessionFailureScenario.toStorageException(JsonResumableSessionFailureScenario.java:106)
	at is.hail.relocated.com.google.cloud.storage.JsonResumableSessionPutTask.call(JsonResumableSessionPutTask.java:224)
	at is.hail.relocated.com.google.cloud.storage.JsonResumableSession.lambda$put$0(JsonResumableSession.java:81)
	at is.hail.relocated.com.google.cloud.storage.Retrying.lambda$run$0(Retrying.java:102)
	at com.google.api.gax.retrying.DirectRetryingExecutor.submit(DirectRetryingExecutor.java:103)
	at is.hail.relocated.com.google.cloud.RetryHelper.run(RetryHelper.java:76)
	at is.hail.relocated.com.google.cloud.RetryHelper.runWithRetries(RetryHelper.java:50)
	at is.hail.relocated.com.google.cloud.storage.Retrying.run(Retrying.java:99)
	at is.hail.relocated.com.google.cloud.storage.JsonResumableSession.put(JsonResumableSession.java:68)
	at is.hail.relocated.com.google.cloud.storage.ApiaryUnbufferedWritableByteChannel.internalWrite(ApiaryUnbufferedWritableByteChannel.java:114)
	at is.hail.relocated.com.google.cloud.storage.ApiaryUnbufferedWritableByteChannel.writeAndClose(ApiaryUnbufferedWritableByteChannel.java:65)
	at is.hail.relocated.com.google.cloud.storage.UnbufferedWritableByteChannelSession$UnbufferedWritableByteChannel.writeAndClose(UnbufferedWritableByteChannelSession.java:40)
	at is.hail.relocated.com.google.cloud.storage.DefaultBufferedWritableByteChannel.close(DefaultBufferedWritableByteChannel.java:167)
	at is.hail.relocated.com.google.cloud.storage.StorageByteChannels$SynchronizedBufferedWritableByteChannel.close(StorageByteChannels.java:119)
	at is.hail.relocated.com.google.cloud.storage.StorageException.wrapIOException(StorageException.java:179)
	at is.hail.relocated.com.google.cloud.storage.BaseStorageWriteChannel.close(BaseStorageWriteChannel.java:84)
	at is.hail.io.fs.GoogleStorageFS$$anon$2.$anonfun$close$2(GoogleStorageFS.scala:312)
	at is.hail.io.fs.GoogleStorageFS$$anon$2.doHandlingRequesterPays(GoogleStorageFS.scala:282)
	at is.hail.io.fs.GoogleStorageFS$$anon$2.$anonfun$close$1(GoogleStorageFS.scala:312)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at is.hail.services.package$.retryTransientErrors(package.scala:182)
	at is.hail.io.fs.GoogleStorageFS$$anon$2.close(GoogleStorageFS.scala:310)
	at java.io.FilterOutputStream.close(FilterOutputStream.java:159)
	at is.hail.utils.richUtils.ByteTrackingOutputStream.close(ByteTrackingOutputStream.scala:23)
	at is.hail.io.index.IndexWriterUtils.close(IndexWriter.scala:225)
	at __C1756collect_distributed_array_table_native_writer.apply_region99_120(Unknown Source)
	at __C1756collect_distributed_array_table_native_writer.apply_region5_223(Unknown Source)
	at __C1756collect_distributed_array_table_native_writer.apply(Unknown Source)
	at __C1756collect_distributed_array_table_native_writer.apply(Unknown Source)
	at is.hail.backend.BackendUtils.$anonfun$collectDArray$16(BackendUtils.scala:91)
	at is.hail.utils.package$.using(package.scala:657)
	at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:162)
	at is.hail.backend.BackendUtils.$anonfun$collectDArray$15(BackendUtils.scala:90)
	at is.hail.backend.service.Worker$.$anonfun$main$9(Worker.scala:172)
	at is.hail.services.package$.retryTransientErrors(package.scala:182)
	at is.hail.backend.service.Worker$.$anonfun$main$8(Worker.scala:171)
	at is.hail.utils.package$.using(package.scala:657)
	at is.hail.backend.service.Worker$.main(Worker.scala:169)
	at is.hail.backend.service.Main$.main(Main.scala:14)
	at is.hail.backend.service.Main.main(Main.scala)
	at sun.reflect.GeneratedMethodAccessor63.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at is.hail.JVMEntryway$1.run(JVMEntryway.java:119)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:750)

java.lang.NullPointerException: null
	at is.hail.relocated.com.google.cloud.storage.JsonResumableSessionPutTask.call(JsonResumableSessionPutTask.java:201)
	at is.hail.relocated.com.google.cloud.storage.JsonResumableSession.lambda$put$0(JsonResumableSession.java:81)
	at is.hail.relocated.com.google.cloud.storage.Retrying.lambda$run$0(Retrying.java:102)
	at com.google.api.gax.retrying.DirectRetryingExecutor.submit(DirectRetryingExecutor.java:103)
	at is.hail.relocated.com.google.cloud.RetryHelper.run(RetryHelper.java:76)
	at is.hail.relocated.com.google.cloud.RetryHelper.runWithRetries(RetryHelper.java:50)
	at is.hail.relocated.com.google.cloud.storage.Retrying.run(Retrying.java:99)
	at is.hail.relocated.com.google.cloud.storage.JsonResumableSession.put(JsonResumableSession.java:68)
	at is.hail.relocated.com.google.cloud.storage.ApiaryUnbufferedWritableByteChannel.internalWrite(ApiaryUnbufferedWritableByteChannel.java:114)
	at is.hail.relocated.com.google.cloud.storage.ApiaryUnbufferedWritableByteChannel.writeAndClose(ApiaryUnbufferedWritableByteChannel.java:65)
	at is.hail.relocated.com.google.cloud.storage.UnbufferedWritableByteChannelSession$UnbufferedWritableByteChannel.writeAndClose(UnbufferedWritableByteChannelSession.java:40)
	at is.hail.relocated.com.google.cloud.storage.DefaultBufferedWritableByteChannel.close(DefaultBufferedWritableByteChannel.java:167)
	at is.hail.relocated.com.google.cloud.storage.StorageByteChannels$SynchronizedBufferedWritableByteChannel.close(StorageByteChannels.java:119)
	at is.hail.relocated.com.google.cloud.storage.StorageException.wrapIOException(StorageException.java:179)
	at is.hail.relocated.com.google.cloud.storage.BaseStorageWriteChannel.close(BaseStorageWriteChannel.java:84)
	at is.hail.io.fs.GoogleStorageFS$$anon$2.$anonfun$close$2(GoogleStorageFS.scala:312)
	at is.hail.io.fs.GoogleStorageFS$$anon$2.doHandlingRequesterPays(GoogleStorageFS.scala:282)
	at is.hail.io.fs.GoogleStorageFS$$anon$2.$anonfun$close$1(GoogleStorageFS.scala:312)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at is.hail.services.package$.retryTransientErrors(package.scala:182)
	at is.hail.io.fs.GoogleStorageFS$$anon$2.close(GoogleStorageFS.scala:310)
	at java.io.FilterOutputStream.close(FilterOutputStream.java:159)
	at is.hail.utils.richUtils.ByteTrackingOutputStream.close(ByteTrackingOutputStream.scala:23)
	at is.hail.io.index.IndexWriterUtils.close(IndexWriter.scala:225)
	at __C1756collect_distributed_array_table_native_writer.apply_region99_120(Unknown Source)
	at __C1756collect_distributed_array_table_native_writer.apply_region5_223(Unknown Source)
	at __C1756collect_distributed_array_table_native_writer.apply(Unknown Source)
	at __C1756collect_distributed_array_table_native_writer.apply(Unknown Source)
	at is.hail.backend.BackendUtils.$anonfun$collectDArray$16(BackendUtils.scala:91)
	at is.hail.utils.package$.using(package.scala:657)
	at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:162)
	at is.hail.backend.BackendUtils.$anonfun$collectDArray$15(BackendUtils.scala:90)
	at is.hail.backend.service.Worker$.$anonfun$main$9(Worker.scala:172)
	at is.hail.services.package$.retryTransientErrors(package.scala:182)
	at is.hail.backend.service.Worker$.$anonfun$main$8(Worker.scala:171)
	at is.hail.utils.package$.using(package.scala:657)
	at is.hail.backend.service.Worker$.main(Worker.scala:169)
	at is.hail.backend.service.Main$.main(Main.scala:14)
	at is.hail.backend.service.Main.main(Main.scala)
	at sun.reflect.GeneratedMethodAccessor63.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at is.hail.JVMEntryway$1.run(JVMEntryway.java:119)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:750)




Hail version: 0.2.125-6e6f46797aed
Error summary: NullPointerException: null

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions