From 19ebc26109b2499514cf235b434d95b29272a56c Mon Sep 17 00:00:00 2001 From: Levon Ghukasyan Date: Wed, 8 May 2024 16:50:43 +0000 Subject: [PATCH 1/3] allow custom configs for s3 --- deeplake/core/storage/s3.py | 5 +++-- deeplake/util/storage.py | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/deeplake/core/storage/s3.py b/deeplake/core/storage/s3.py index abeb704186..ad852b1b7a 100644 --- a/deeplake/core/storage/s3.py +++ b/deeplake/core/storage/s3.py @@ -6,7 +6,7 @@ import botocore # type: ignore import posixpath import ssl -from typing import Dict, Optional, Tuple, Type +from typing import Dict, Optional, Tuple, Type, Any from datetime import datetime, timezone from botocore.session import ComponentLocator from deeplake.client.client import DeepLakeBackendClient @@ -96,6 +96,7 @@ def __init__( aws_region: Optional[str] = None, profile_name: Optional[str] = None, token: Optional[str] = None, + config: Optional[Dict[str, Any]] = None, **kwargs, ): """Initializes the S3Provider @@ -132,7 +133,7 @@ def __init__( self.tag: Optional[str] = None self.token: Optional[str] = token self.loaded_creds_from_environment = False - self.client_config = deeplake.config["s3"] + self.client_config = config self.start_time = time.time() self.profile_name = profile_name self._initialize_s3_parameters() diff --git a/deeplake/util/storage.py b/deeplake/util/storage.py index 6c0acdd505..4e00ac6f30 100644 --- a/deeplake/util/storage.py +++ b/deeplake/util/storage.py @@ -84,6 +84,7 @@ def storage_provider_from_path( session_token = creds.get("aws_session_token") endpoint_url = creds.get("endpoint_url") region = creds.get("aws_region") or creds.get("region") + config = creds.get("config", None) or deeplake.config["s3"] profile = creds.get("profile_name") storage = S3Provider( path, @@ -94,6 +95,7 @@ def storage_provider_from_path( region, profile_name=profile, token=token, + config=config, ) storage.creds_used = creds_used else: From 57fefddd3f63f94463bcb338230d060012b8d06c Mon Sep 17 00:00:00 2001 From: Levon Ghukasyan Date: Wed, 8 May 2024 17:34:11 +0000 Subject: [PATCH 2/3] fix linter --- deeplake/core/storage/s3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deeplake/core/storage/s3.py b/deeplake/core/storage/s3.py index ad852b1b7a..45957e109a 100644 --- a/deeplake/core/storage/s3.py +++ b/deeplake/core/storage/s3.py @@ -96,7 +96,7 @@ def __init__( aws_region: Optional[str] = None, profile_name: Optional[str] = None, token: Optional[str] = None, - config: Optional[Dict[str, Any]] = None, + config: Optional[Any] = None, **kwargs, ): """Initializes the S3Provider @@ -119,6 +119,7 @@ def __init__( profile_name (str, optional): Specifies the AWS profile name to use. token (str, optional): Activeloop token, used for fetching credentials for Deep Lake datasets (if this is underlying storage for Deep Lake dataset). This is optional, tokens are normally autogenerated. + config (Any, Optional): s3 client configuration provided by the user. Defaults to None. **kwargs: Additional arguments to pass to the S3 client. Includes: ``expiration``. """ self.root = root From 4364acae4fa9856ff77abad10a5664a15e326b48 Mon Sep 17 00:00:00 2001 From: Levon Ghukasyan Date: Thu, 9 May 2024 16:30:51 +0000 Subject: [PATCH 3/3] changed get_object request based on discussion --- deeplake/core/storage/s3.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/deeplake/core/storage/s3.py b/deeplake/core/storage/s3.py index 45957e109a..4f0450840a 100644 --- a/deeplake/core/storage/s3.py +++ b/deeplake/core/storage/s3.py @@ -236,17 +236,16 @@ def __getitem__(self, path): def _get_bytes( self, path, start_byte: Optional[int] = None, end_byte: Optional[int] = None ): + range_kwarg = {} if start_byte is not None and end_byte is not None: if start_byte == end_byte: return b"" - range = f"bytes={start_byte}-{end_byte - 1}" + range_kwarg["Range"] = f"bytes={start_byte}-{end_byte - 1}" elif start_byte is not None: - range = f"bytes={start_byte}-" + range_kwarg["Range"] = f"bytes={start_byte}-" elif end_byte is not None: - range = f"bytes=0-{end_byte - 1}" - else: - range = "" - resp = self.client.get_object(Bucket=self.bucket, Key=path, Range=range) + range_kwarg["Range"] = f"bytes=0-{end_byte - 1}" + resp = self.client.get_object(Bucket=self.bucket, Key=path, **range_kwarg) return resp["Body"].read() def get_bytes(