Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow custom configs for s3 #2849

Merged
merged 4 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 9 additions & 8 deletions deeplake/core/storage/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import botocore # type: ignore
import posixpath
import ssl
from typing import Dict, Optional, Tuple, Type
from typing import Dict, Optional, Tuple, Type, Any
from datetime import datetime, timezone
from botocore.session import ComponentLocator
from deeplake.client.client import DeepLakeBackendClient
Expand Down Expand Up @@ -96,6 +96,7 @@ def __init__(
aws_region: Optional[str] = None,
profile_name: Optional[str] = None,
token: Optional[str] = None,
config: Optional[Any] = None,
**kwargs,
):
"""Initializes the S3Provider
Expand All @@ -118,6 +119,7 @@ def __init__(
profile_name (str, optional): Specifies the AWS profile name to use.
token (str, optional): Activeloop token, used for fetching credentials for Deep Lake datasets (if this is underlying storage for Deep Lake dataset).
This is optional, tokens are normally autogenerated.
config (Any, Optional): s3 client configuration provided by the user. Defaults to None.
**kwargs: Additional arguments to pass to the S3 client. Includes: ``expiration``.
"""
self.root = root
Expand All @@ -132,7 +134,7 @@ def __init__(
self.tag: Optional[str] = None
self.token: Optional[str] = token
self.loaded_creds_from_environment = False
self.client_config = deeplake.config["s3"]
self.client_config = config
self.start_time = time.time()
self.profile_name = profile_name
self._initialize_s3_parameters()
Expand Down Expand Up @@ -234,17 +236,16 @@ def __getitem__(self, path):
def _get_bytes(
self, path, start_byte: Optional[int] = None, end_byte: Optional[int] = None
):
range_kwarg = {}
if start_byte is not None and end_byte is not None:
if start_byte == end_byte:
return b""
range = f"bytes={start_byte}-{end_byte - 1}"
range_kwarg["Range"] = f"bytes={start_byte}-{end_byte - 1}"
elif start_byte is not None:
range = f"bytes={start_byte}-"
range_kwarg["Range"] = f"bytes={start_byte}-"
elif end_byte is not None:
range = f"bytes=0-{end_byte - 1}"
else:
range = ""
resp = self.client.get_object(Bucket=self.bucket, Key=path, Range=range)
range_kwarg["Range"] = f"bytes=0-{end_byte - 1}"
resp = self.client.get_object(Bucket=self.bucket, Key=path, **range_kwarg)
return resp["Body"].read()

def get_bytes(
Expand Down
2 changes: 2 additions & 0 deletions deeplake/util/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def storage_provider_from_path(
session_token = creds.get("aws_session_token")
endpoint_url = creds.get("endpoint_url")
region = creds.get("aws_region") or creds.get("region")
config = creds.get("config", None) or deeplake.config["s3"]
profile = creds.get("profile_name")
storage = S3Provider(
path,
Expand All @@ -94,6 +95,7 @@ def storage_provider_from_path(
region,
profile_name=profile,
token=token,
config=config,
)
storage.creds_used = creds_used
else:
Expand Down