Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENHANCEMENT] Add possibility to pass boto3 configuration to TupleS3StoreBackend (Thanks for #1691 to @mgorsk1!) #2371

1 change: 1 addition & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Changelog
Develop
-----------------
* [DOCS] How to load a Pandas DataFrame as a Batch #2327
* [ENHANCEMENT] Add possibility to pass boto3 configuration to TupleS3StoreBackend (Thanks for #1691 to @mgorsk1!) #2371


0.13.8
Expand Down
90 changes: 52 additions & 38 deletions great_expectations/data_context/store/tuple_store_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ def __init__(
self,
bucket,
prefix="",
boto3_options=None,
filepath_template=None,
filepath_prefix=None,
filepath_suffix=None,
Expand Down Expand Up @@ -470,6 +471,9 @@ def __init__(
# whether the rest of the key is built with platform-specific separators or not
prefix = prefix.strip("/")
self.prefix = prefix
if boto3_options is None:
boto3_options = {}
self._boto3_options = boto3_options
self.endpoint_url = endpoint_url
# Initialize with store_backend_id if not part of an HTMLSiteStore
if not self._suppress_store_backend_id:
Expand Down Expand Up @@ -503,12 +507,10 @@ def _build_s3_object_key(self, key):
return s3_object_key

def _get(self, key):
import boto3

s3 = boto3.client("s3", endpoint_url=self.endpoint_url)

s3_object_key = self._build_s3_object_key(key)

s3 = self._create_client()

try:
s3_response_object = s3.get_object(Bucket=self.bucket, Key=s3_object_key)
except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket):
Expand All @@ -525,12 +527,10 @@ def _get(self, key):
def _set(
self, key, value, content_encoding="utf-8", content_type="application/json"
):
import boto3

s3 = boto3.resource("s3", endpoint_url=self.endpoint_url)

s3_object_key = self._build_s3_object_key(key)

s3 = self._create_resource()

try:
result_s3 = s3.Object(self.bucket, s3_object_key)
if isinstance(value, str):
Expand All @@ -548,9 +548,7 @@ def _set(
return s3_object_key

def _move(self, source_key, dest_key, **kwargs):
import boto3

s3 = boto3.resource("s3", endpoint_url=self.endpoint_url)
s3 = self._create_resource()

source_filepath = self._convert_key_to_filepath(source_key)
if not source_filepath.startswith(self.prefix):
Expand All @@ -566,11 +564,7 @@ def _move(self, source_key, dest_key, **kwargs):
s3.Object(self.bucket, source_filepath).delete()

def list_keys(self):
key_list = []

import boto3

s3 = boto3.client("s3", endpoint_url=self.endpoint_url)
s3 = self._create_client()
paginator = s3.get_paginator("list_objects_v2")

if self.prefix:
Expand All @@ -596,6 +590,7 @@ def list_keys(self):
if current_page_contents is not None:
objects.extend(current_page_contents)

key_list = []
for s3_object_info in objects:
s3_object_key = s3_object_info["Key"]
if self.platform_specific_separator:
Expand All @@ -622,24 +617,21 @@ def list_keys(self):
return key_list

def get_url_for_key(self, key, protocol=None):
import boto3

location = boto3.client(
"s3", endpoint_url=self.endpoint_url
).get_bucket_location(Bucket=self.bucket)["LocationConstraint"]
if location is None:
location = "s3"
location = self._create_client().get_bucket_location(Bucket=self.bucket)[
"LocationConstraint"
]
if self.boto3_options.get("endpoint_url"):
location = self.boto3_options.get("endpoint_url")
elif location is None:
location = "https://s3.amazonaws.com"
else:
location = "s3-" + location
location = "https://s3-" + location + ".amazonaws.com"

s3_key = self._convert_key_to_filepath(key)

if not self.prefix:
url = f"https://{location}.amazonaws.com/{self.bucket}/{s3_key}"
else:
url = (
f"https://{location}.amazonaws.com/{self.bucket}/{self.prefix}/{s3_key}"
)
return url
return f"{location}/{self.bucket}/{s3_key}"
return f"{location}/{self.bucket}/{self.prefix}/{s3_key}"

def get_public_url_for_key(self, key, protocol=None):
if not self.base_public_path:
Expand All @@ -657,13 +649,12 @@ def get_public_url_for_key(self, key, protocol=None):
return public_url

def remove_key(self, key):
import boto3
from botocore.exceptions import ClientError

if not isinstance(key, tuple):
key = key.to_tuple()

s3 = boto3.resource("s3", endpoint_url=self.endpoint_url)
s3 = self._create_resource()
s3_object_key = self._build_s3_object_key(key)
s3.Object(self.bucket, s3_object_key).delete()
if s3_object_key:
Expand All @@ -673,13 +664,14 @@ def remove_key(self, key):
Bucket=self.bucket, Prefix=self.prefix
)

delete_keys = {"Objects": []}
delete_keys["Objects"] = [
{"Key": k}
for k in [
obj["Key"] for obj in objects_to_delete.get("Contents", [])
delete_keys = {
"Objects": [
{"Key": k}
for k in [
obj["Key"] for obj in objects_to_delete.get("Contents", [])
]
]
]
}
s3.meta.client.delete_objects(Bucket=self.bucket, Delete=delete_keys)
return True
except ClientError as e:
Expand All @@ -691,6 +683,28 @@ def _has_key(self, key):
all_keys = self.list_keys()
return key in all_keys

@property
def boto3_options(self):
from botocore.client import Config

result = {}
if self._boto3_options.get("signature_version"):
signature_version = self._boto3_options.pop("signature_version")
result["config"] = Config(signature_version=signature_version)
result.update(self._boto3_options)

return result

def _create_client(self):
import boto3

return boto3.client("s3", **self.boto3_options)

def _create_resource(self):
import boto3

return boto3.resource("s3", **self.boto3_options)

@property
def config(self) -> dict:
return self._config
Expand Down