From 2763c7805fc7265e4855146239aee6092828dbf4 Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Mon, 1 Feb 2021 14:43:58 -0800 Subject: [PATCH 1/3] LINT --- examples/expectations/column_map_expectation_template.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/expectations/column_map_expectation_template.py b/examples/expectations/column_map_expectation_template.py index 6cdd2d1f3939..af9669a3cd7b 100644 --- a/examples/expectations/column_map_expectation_template.py +++ b/examples/expectations/column_map_expectation_template.py @@ -30,7 +30,7 @@ # This class defines a Metric to support your Expectation # For most Expectations, the main business logic for calculation will live here. -# To learn about the relationship between Metrics and Expectations, please visit +# To learn about the relationship between Metrics and Expectations, please visit # https://docs.greatexpectations.io/en/latest/reference/core_concepts.html#expectations-and-metrics. class ColumnValuesEqualThree(ColumnMapMetricProvider): @@ -107,7 +107,7 @@ class ExpectColumnValuesToEqualThree(ColumnMapExpectation): default_kwarg_values = {} # This method defines a question Renderer - # For more info on Renderers, see + # For more info on Renderers, see # https://docs.greatexpectations.io/en/latest/guides/how_to_guides/configuring_data_docs/how_to_create_renderers_for_custom_expectations.html #!!! This example renderer should render RenderedStringTemplateContent, not just a string From 5424884be632fd53975f37a47f1ba2fbfe1cde0c Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Tue, 2 Feb 2021 09:09:33 -0800 Subject: [PATCH 2/3] Adding possibility to pass boto3_options to TupleS3StoreBackend -- thanks to PR 1691 by mgorsk1 (Community Contributor). --- .../data_context/store/tuple_store_backend.py | 90 +++++++++++-------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/great_expectations/data_context/store/tuple_store_backend.py b/great_expectations/data_context/store/tuple_store_backend.py index f286504f1a7e..895c7dd0bf31 100644 --- a/great_expectations/data_context/store/tuple_store_backend.py +++ b/great_expectations/data_context/store/tuple_store_backend.py @@ -437,6 +437,7 @@ def __init__( self, bucket, prefix="", + boto3_options=None, filepath_template=None, filepath_prefix=None, filepath_suffix=None, @@ -470,6 +471,9 @@ def __init__( # whether the rest of the key is built with platform-specific separators or not prefix = prefix.strip("/") self.prefix = prefix + if boto3_options is None: + boto3_options = {} + self._boto3_options = boto3_options self.endpoint_url = endpoint_url # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: @@ -503,12 +507,10 @@ def _build_s3_object_key(self, key): return s3_object_key def _get(self, key): - import boto3 - - s3 = boto3.client("s3", endpoint_url=self.endpoint_url) - s3_object_key = self._build_s3_object_key(key) + s3 = self._create_client() + try: s3_response_object = s3.get_object(Bucket=self.bucket, Key=s3_object_key) except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket): @@ -525,12 +527,10 @@ def _get(self, key): def _set( self, key, value, content_encoding="utf-8", content_type="application/json" ): - import boto3 - - s3 = boto3.resource("s3", endpoint_url=self.endpoint_url) - s3_object_key = self._build_s3_object_key(key) + s3 = self._create_resource() + try: result_s3 = s3.Object(self.bucket, s3_object_key) if isinstance(value, str): @@ -548,9 +548,7 @@ def _set( return s3_object_key def _move(self, source_key, dest_key, **kwargs): - import boto3 - - s3 = boto3.resource("s3", endpoint_url=self.endpoint_url) + s3 = self._create_resource() source_filepath = self._convert_key_to_filepath(source_key) if not source_filepath.startswith(self.prefix): @@ -566,11 +564,7 @@ def _move(self, source_key, dest_key, **kwargs): s3.Object(self.bucket, source_filepath).delete() def list_keys(self): - key_list = [] - - import boto3 - - s3 = boto3.client("s3", endpoint_url=self.endpoint_url) + s3 = self._create_client() paginator = s3.get_paginator("list_objects_v2") if self.prefix: @@ -596,6 +590,7 @@ def list_keys(self): if current_page_contents is not None: objects.extend(current_page_contents) + key_list = [] for s3_object_info in objects: s3_object_key = s3_object_info["Key"] if self.platform_specific_separator: @@ -622,24 +617,21 @@ def list_keys(self): return key_list def get_url_for_key(self, key, protocol=None): - import boto3 - - location = boto3.client( - "s3", endpoint_url=self.endpoint_url - ).get_bucket_location(Bucket=self.bucket)["LocationConstraint"] - if location is None: - location = "s3" + location = self._create_client().get_bucket_location(Bucket=self.bucket)[ + "LocationConstraint" + ] + if self.boto3_options.get("endpoint_url"): + location = self.boto3_options.get("endpoint_url") + elif location is None: + location = "https://s3.amazonaws.com" else: - location = "s3-" + location + location = "https://s3-" + location + ".amazonaws.com" + s3_key = self._convert_key_to_filepath(key) if not self.prefix: - url = f"https://{location}.amazonaws.com/{self.bucket}/{s3_key}" - else: - url = ( - f"https://{location}.amazonaws.com/{self.bucket}/{self.prefix}/{s3_key}" - ) - return url + return f"{location}/{self.bucket}/{s3_key}" + return f"{location}/{self.bucket}/{self.prefix}/{s3_key}" def get_public_url_for_key(self, key, protocol=None): if not self.base_public_path: @@ -657,13 +649,12 @@ def get_public_url_for_key(self, key, protocol=None): return public_url def remove_key(self, key): - import boto3 from botocore.exceptions import ClientError if not isinstance(key, tuple): key = key.to_tuple() - s3 = boto3.resource("s3", endpoint_url=self.endpoint_url) + s3 = self._create_resource() s3_object_key = self._build_s3_object_key(key) s3.Object(self.bucket, s3_object_key).delete() if s3_object_key: @@ -673,13 +664,14 @@ def remove_key(self, key): Bucket=self.bucket, Prefix=self.prefix ) - delete_keys = {"Objects": []} - delete_keys["Objects"] = [ - {"Key": k} - for k in [ - obj["Key"] for obj in objects_to_delete.get("Contents", []) + delete_keys = { + "Objects": [ + {"Key": k} + for k in [ + obj["Key"] for obj in objects_to_delete.get("Contents", []) + ] ] - ] + } s3.meta.client.delete_objects(Bucket=self.bucket, Delete=delete_keys) return True except ClientError as e: @@ -691,6 +683,28 @@ def _has_key(self, key): all_keys = self.list_keys() return key in all_keys + @property + def boto3_options(self): + from botocore.client import Config + + result = {} + if self._boto3_options.get("signature_version"): + signature_version = self._boto3_options.pop("signature_version") + result["config"] = Config(signature_version=signature_version) + result.update(self._boto3_options) + + return result + + def _create_client(self): + import boto3 + + return boto3.client("s3", **self.boto3_options) + + def _create_resource(self): + import boto3 + + return boto3.resource("s3", **self.boto3_options) + @property def config(self) -> dict: return self._config From fe1b3cddafd23ede2a0805ec91252c0da8111916 Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Tue, 2 Feb 2021 09:12:26 -0800 Subject: [PATCH 3/3] PR note in changelog. --- docs/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 3b9f42996a8a..2e6d3d626bfc 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -8,6 +8,7 @@ Changelog Develop ----------------- * [DOCS] How to load a Pandas DataFrame as a Batch #2327 +* [ENHANCEMENT] Add possibility to pass boto3 configuration to TupleS3StoreBackend (Thanks for #1691 to @mgorsk1!) #2371 0.13.8