From 8edc350f1bd724a79de772cc455052f700386226 Mon Sep 17 00:00:00 2001 From: activesoull Date: Sat, 27 Apr 2024 19:06:10 +0400 Subject: [PATCH] retry on get_object_size --- deeplake/core/storage/s3.py | 38 +++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/deeplake/core/storage/s3.py b/deeplake/core/storage/s3.py index 2b753821f4..abeb704186 100644 --- a/deeplake/core/storage/s3.py +++ b/deeplake/core/storage/s3.py @@ -635,11 +635,45 @@ def get_presigned_url(self, key, full=False): self._presigned_urls[path] = (url, time.time()) return url + def _get_object_size(self, path: str) -> int: + obj = self.resource.Object(self.bucket, path) + return obj.content_length + def get_object_size(self, path: str) -> int: self._check_update_creds() path = "".join((self.path, path)) - obj = self.resource.Object(self.bucket, path) - return obj.content_length + + try: + return self._get_object_size(path) + except botocore.exceptions.ClientError as err: + if err.response["Error"]["Code"] == "NoSuchKey": + raise KeyError(err) from err + if err.response["Error"]["Code"] == "InvalidAccessKeyId": + new_error_cls: Type[S3GetError] = S3GetAccessError + else: + new_error_cls = S3GetError + with S3ResetReloadCredentialsManager(self, new_error_cls): + return self._get_object_size(path) + except CONNECTION_ERRORS as err: + tries = self.num_tries + retry_wait = 0 + for i in range(1, tries + 1): + always_warn(f"Encountered connection error, retry {i} out of {tries}") + retry_wait = self._retry_wait_and_extend(retry_wait, err) + + try: + ret = self._get_object_size(path) + always_warn( + f"Connection re-established after {i} {['retries', 'retry'][i==1]}." + ) + return ret + except Exception: + pass + raise S3GetError(err) from err + except botocore.exceptions.NoCredentialsError as err: + raise S3GetAccessError from err + except Exception as err: + raise S3GetError(err) from err def get_object_from_full_url(self, url: str): root = url.replace("s3://", "")