diff --git a/deeplake/core/storage/s3.py b/deeplake/core/storage/s3.py index abeb704186..4f0450840a 100644 --- a/deeplake/core/storage/s3.py +++ b/deeplake/core/storage/s3.py @@ -6,7 +6,7 @@ import botocore # type: ignore import posixpath import ssl -from typing import Dict, Optional, Tuple, Type +from typing import Dict, Optional, Tuple, Type, Any from datetime import datetime, timezone from botocore.session import ComponentLocator from deeplake.client.client import DeepLakeBackendClient @@ -96,6 +96,7 @@ def __init__( aws_region: Optional[str] = None, profile_name: Optional[str] = None, token: Optional[str] = None, + config: Optional[Any] = None, **kwargs, ): """Initializes the S3Provider @@ -118,6 +119,7 @@ def __init__( profile_name (str, optional): Specifies the AWS profile name to use. token (str, optional): Activeloop token, used for fetching credentials for Deep Lake datasets (if this is underlying storage for Deep Lake dataset). This is optional, tokens are normally autogenerated. + config (Any, Optional): s3 client configuration provided by the user. Defaults to None. **kwargs: Additional arguments to pass to the S3 client. Includes: ``expiration``. """ self.root = root @@ -132,7 +134,7 @@ def __init__( self.tag: Optional[str] = None self.token: Optional[str] = token self.loaded_creds_from_environment = False - self.client_config = deeplake.config["s3"] + self.client_config = config self.start_time = time.time() self.profile_name = profile_name self._initialize_s3_parameters() @@ -234,17 +236,16 @@ def __getitem__(self, path): def _get_bytes( self, path, start_byte: Optional[int] = None, end_byte: Optional[int] = None ): + range_kwarg = {} if start_byte is not None and end_byte is not None: if start_byte == end_byte: return b"" - range = f"bytes={start_byte}-{end_byte - 1}" + range_kwarg["Range"] = f"bytes={start_byte}-{end_byte - 1}" elif start_byte is not None: - range = f"bytes={start_byte}-" + range_kwarg["Range"] = f"bytes={start_byte}-" elif end_byte is not None: - range = f"bytes=0-{end_byte - 1}" - else: - range = "" - resp = self.client.get_object(Bucket=self.bucket, Key=path, Range=range) + range_kwarg["Range"] = f"bytes=0-{end_byte - 1}" + resp = self.client.get_object(Bucket=self.bucket, Key=path, **range_kwarg) return resp["Body"].read() def get_bytes( diff --git a/deeplake/util/storage.py b/deeplake/util/storage.py index 6c0acdd505..4e00ac6f30 100644 --- a/deeplake/util/storage.py +++ b/deeplake/util/storage.py @@ -84,6 +84,7 @@ def storage_provider_from_path( session_token = creds.get("aws_session_token") endpoint_url = creds.get("endpoint_url") region = creds.get("aws_region") or creds.get("region") + config = creds.get("config", None) or deeplake.config["s3"] profile = creds.get("profile_name") storage = S3Provider( path, @@ -94,6 +95,7 @@ def storage_provider_from_path( region, profile_name=profile, token=token, + config=config, ) storage.creds_used = creds_used else: