diff --git a/awswrangler/_config.py b/awswrangler/_config.py index 859d60e0a..852ea762a 100644 --- a/awswrangler/_config.py +++ b/awswrangler/_config.py @@ -29,6 +29,8 @@ class _ConfigArg(NamedTuple): "database": _ConfigArg(dtype=str, nullable=True), "max_cache_query_inspections": _ConfigArg(dtype=int, nullable=False), "max_cache_seconds": _ConfigArg(dtype=int, nullable=False), + "max_remote_cache_entries": _ConfigArg(dtype=int, nullable=False), + "max_local_cache_entries": _ConfigArg(dtype=int, nullable=False), "s3_block_size": _ConfigArg(dtype=int, nullable=False, enforced=True), "workgroup": _ConfigArg(dtype=str, nullable=False, enforced=True), # Endpoints URLs @@ -226,6 +228,35 @@ def max_cache_seconds(self) -> int: def max_cache_seconds(self, value: int) -> None: self._set_config_value(key="max_cache_seconds", value=value) + @property + def max_local_cache_entries(self) -> int: + """Property max_local_cache_entries.""" + return cast(int, self["max_local_cache_entries"]) + + @max_local_cache_entries.setter + def max_local_cache_entries(self, value: int) -> None: + try: + max_remote_cache_entries = cast(int, self["max_remote_cache_entries"]) + except AttributeError: + max_remote_cache_entries = 50 + if value < max_remote_cache_entries: + _logger.warning( + "max_remote_cache_entries shouldn't be greater than max_local_cache_entries. " + "Therefore max_remote_cache_entries will be set to %s as well.", + value, + ) + self._set_config_value(key="max_remote_cache_entries", value=value) + self._set_config_value(key="max_local_cache_entries", value=value) + + @property + def max_remote_cache_entries(self) -> int: + """Property max_remote_cache_entries.""" + return cast(int, self["max_remote_cache_entries"]) + + @max_remote_cache_entries.setter + def max_remote_cache_entries(self, value: int) -> None: + self._set_config_value(key="max_remote_cache_entries", value=value) + @property def s3_block_size(self) -> int: """Property s3_block_size.""" diff --git a/awswrangler/athena/_read.py b/awswrangler/athena/_read.py index 0d06c2ae8..97276e3ae 100644 --- a/awswrangler/athena/_read.py +++ b/awswrangler/athena/_read.py @@ -20,6 +20,7 @@ _get_query_metadata, _get_s3_output, _get_workgroup_config, + _LocalMetadataCacheManager, _QueryMetadata, _start_query_execution, _WorkGroupConfig, @@ -96,33 +97,37 @@ def _compare_query_string(sql: str, other: str) -> bool: return False -def _get_last_query_executions( - boto3_session: Optional[boto3.Session] = None, workgroup: Optional[str] = None -) -> Iterator[List[Dict[str, Any]]]: +def _get_last_query_infos( + max_remote_cache_entries: int, + boto3_session: Optional[boto3.Session] = None, + workgroup: Optional[str] = None, +) -> List[Dict[str, Any]]: """Return an iterator of `query_execution_info`s run by the workgroup in Athena.""" client_athena: boto3.client = _utils.client(service_name="athena", session=boto3_session) - args: Dict[str, Union[str, Dict[str, int]]] = {"PaginationConfig": {"MaxItems": 50, "PageSize": 50}} + page_size = 50 + args: Dict[str, Union[str, Dict[str, int]]] = { + "PaginationConfig": {"MaxItems": max_remote_cache_entries, "PageSize": page_size} + } if workgroup is not None: args["WorkGroup"] = workgroup paginator = client_athena.get_paginator("list_query_executions") + uncached_ids = [] for page in paginator.paginate(**args): _logger.debug("paginating Athena's queries history...") query_execution_id_list: List[str] = page["QueryExecutionIds"] - execution_data = client_athena.batch_get_query_execution(QueryExecutionIds=query_execution_id_list) - yield execution_data.get("QueryExecutions") - - -def _sort_successful_executions_data(query_executions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """ - Sorts `_get_last_query_executions`'s results based on query Completion DateTime. - - This is useful to guarantee LRU caching rules. - """ - filtered: List[Dict[str, Any]] = [] - for query in query_executions: - if (query["Status"].get("State") == "SUCCEEDED") and (query.get("StatementType") in ["DDL", "DML"]): - filtered.append(query) - return sorted(filtered, key=lambda e: str(e["Status"]["CompletionDateTime"]), reverse=True) + for query_execution_id in query_execution_id_list: + if query_execution_id not in _cache_manager: + uncached_ids.append(query_execution_id) + if uncached_ids: + new_execution_data = [] + for i in range(0, len(uncached_ids), page_size): + new_execution_data.extend( + client_athena.batch_get_query_execution(QueryExecutionIds=uncached_ids[i : i + page_size]).get( + "QueryExecutions" + ) + ) + _cache_manager.update_cache(new_execution_data) + return _cache_manager.sorted_successful_generator() def _parse_select_query_from_possible_ctas(possible_ctas: str) -> Optional[str]: @@ -150,6 +155,7 @@ def _check_for_cached_results( workgroup: Optional[str], max_cache_seconds: int, max_cache_query_inspections: int, + max_remote_cache_entries: int, ) -> _CacheInfo: """ Check whether `sql` has been run before, within the `max_cache_seconds` window, by the `workgroup`. @@ -162,45 +168,41 @@ def _check_for_cached_results( comparable_sql: str = _prepare_query_string_for_comparison(sql) current_timestamp: datetime.datetime = datetime.datetime.now(datetime.timezone.utc) _logger.debug("current_timestamp: %s", current_timestamp) - for query_executions in _get_last_query_executions(boto3_session=boto3_session, workgroup=workgroup): - _logger.debug("len(query_executions): %s", len(query_executions)) - cached_queries: List[Dict[str, Any]] = _sort_successful_executions_data(query_executions=query_executions) - _logger.debug("len(cached_queries): %s", len(cached_queries)) - for query_info in cached_queries: - query_execution_id: str = query_info["QueryExecutionId"] - query_timestamp: datetime.datetime = query_info["Status"]["CompletionDateTime"] - _logger.debug("query_timestamp: %s", query_timestamp) - - if (current_timestamp - query_timestamp).total_seconds() > max_cache_seconds: - return _CacheInfo( - has_valid_cache=False, query_execution_id=query_execution_id, query_execution_payload=query_info - ) - - statement_type: Optional[str] = query_info.get("StatementType") - if statement_type == "DDL" and query_info["Query"].startswith("CREATE TABLE"): - parsed_query: Optional[str] = _parse_select_query_from_possible_ctas(possible_ctas=query_info["Query"]) - if parsed_query is not None: - if _compare_query_string(sql=comparable_sql, other=parsed_query): - return _CacheInfo( - has_valid_cache=True, - file_format="parquet", - query_execution_id=query_execution_id, - query_execution_payload=query_info, - ) - elif statement_type == "DML" and not query_info["Query"].startswith("INSERT"): - if _compare_query_string(sql=comparable_sql, other=query_info["Query"]): + for query_info in _get_last_query_infos( + max_remote_cache_entries=max_remote_cache_entries, + boto3_session=boto3_session, + workgroup=workgroup, + ): + query_execution_id: str = query_info["QueryExecutionId"] + query_timestamp: datetime.datetime = query_info["Status"]["CompletionDateTime"] + _logger.debug("query_timestamp: %s", query_timestamp) + if (current_timestamp - query_timestamp).total_seconds() > max_cache_seconds: + return _CacheInfo( + has_valid_cache=False, query_execution_id=query_execution_id, query_execution_payload=query_info + ) + statement_type: Optional[str] = query_info.get("StatementType") + if statement_type == "DDL" and query_info["Query"].startswith("CREATE TABLE"): + parsed_query: Optional[str] = _parse_select_query_from_possible_ctas(possible_ctas=query_info["Query"]) + if parsed_query is not None: + if _compare_query_string(sql=comparable_sql, other=parsed_query): return _CacheInfo( has_valid_cache=True, - file_format="csv", + file_format="parquet", query_execution_id=query_execution_id, query_execution_payload=query_info, ) - - num_executions_inspected += 1 - _logger.debug("num_executions_inspected: %s", num_executions_inspected) - if num_executions_inspected >= max_cache_query_inspections: - return _CacheInfo(has_valid_cache=False) - + elif statement_type == "DML" and not query_info["Query"].startswith("INSERT"): + if _compare_query_string(sql=comparable_sql, other=query_info["Query"]): + return _CacheInfo( + has_valid_cache=True, + file_format="csv", + query_execution_id=query_execution_id, + query_execution_payload=query_info, + ) + num_executions_inspected += 1 + _logger.debug("num_executions_inspected: %s", num_executions_inspected) + if num_executions_inspected >= max_cache_query_inspections: + return _CacheInfo(has_valid_cache=False) return _CacheInfo(has_valid_cache=False) @@ -302,6 +304,7 @@ def _resolve_query_with_cache( boto3_session=session, categories=categories, query_execution_payload=cache_info.query_execution_payload, + metadata_cache_manager=_cache_manager, ) if cache_info.file_format == "parquet": return _fetch_parquet_result( @@ -380,6 +383,7 @@ def _resolve_query_without_cache_ctas( query_execution_id=query_id, boto3_session=boto3_session, categories=categories, + metadata_cache_manager=_cache_manager, ) except exceptions.QueryFailed as ex: msg: str = str(ex) @@ -439,6 +443,7 @@ def _resolve_query_without_cache_regular( query_execution_id=query_id, boto3_session=boto3_session, categories=categories, + metadata_cache_manager=_cache_manager, ) return _fetch_csv_result( query_metadata=query_metadata, @@ -532,6 +537,8 @@ def read_sql_query( boto3_session: Optional[boto3.Session] = None, max_cache_seconds: int = 0, max_cache_query_inspections: int = 50, + max_remote_cache_entries: int = 50, + max_local_cache_entries: int = 100, data_source: Optional[str] = None, params: Optional[Dict[str, Any]] = None, ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: @@ -678,6 +685,15 @@ def read_sql_query( Max number of queries that will be inspected from the history to try to find some result to reuse. The bigger the number of inspection, the bigger will be the latency for not cached queries. Only takes effect if max_cache_seconds > 0. + max_remote_cache_entries : int + Max number of queries that will be retrieved from AWS for cache inspection. + The bigger the number of inspection, the bigger will be the latency for not cached queries. + Only takes effect if max_cache_seconds > 0 and default value is 50. + max_local_cache_entries : int + Max number of queries for which metadata will be cached locally. This will reduce the latency and also + enables keeping more than `max_remote_cache_entries` available for the cache. This value should not be + smaller than max_remote_cache_entries. + Only takes effect if max_cache_seconds > 0 and default value is 100. data_source : str, optional Data Source / Catalog name. If None, 'AwsDataCatalog' will be used by default. params: Dict[str, any], optional @@ -718,12 +734,17 @@ def read_sql_query( for key, value in params.items(): sql = sql.replace(f":{key};", str(value)) + if max_remote_cache_entries > max_local_cache_entries: + max_remote_cache_entries = max_local_cache_entries + + _cache_manager.max_cache_size = max_local_cache_entries cache_info: _CacheInfo = _check_for_cached_results( sql=sql, boto3_session=session, workgroup=workgroup, max_cache_seconds=max_cache_seconds, max_cache_query_inspections=max_cache_query_inspections, + max_remote_cache_entries=max_remote_cache_entries, ) _logger.debug("cache_info:\n%s", cache_info) if cache_info.has_valid_cache is True: @@ -774,6 +795,8 @@ def read_sql_table( boto3_session: Optional[boto3.Session] = None, max_cache_seconds: int = 0, max_cache_query_inspections: int = 50, + max_remote_cache_entries: int = 50, + max_local_cache_entries: int = 100, data_source: Optional[str] = None, ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: """Extract the full table AWS Athena and return the results as a Pandas DataFrame. @@ -914,6 +937,15 @@ def read_sql_table( Max number of queries that will be inspected from the history to try to find some result to reuse. The bigger the number of inspection, the bigger will be the latency for not cached queries. Only takes effect if max_cache_seconds > 0. + max_remote_cache_entries : int + Max number of queries that will be retrieved from AWS for cache inspection. + The bigger the number of inspection, the bigger will be the latency for not cached queries. + Only takes effect if max_cache_seconds > 0 and default value is 50. + max_local_cache_entries : int + Max number of queries for which metadata will be cached locally. This will reduce the latency and also + enables keeping more than `max_remote_cache_entries` available for the cache. This value should not be + smaller than max_remote_cache_entries. + Only takes effect if max_cache_seconds > 0 and default value is 100. data_source : str, optional Data Source / Catalog name. If None, 'AwsDataCatalog' will be used by default. @@ -947,4 +979,9 @@ def read_sql_table( boto3_session=boto3_session, max_cache_seconds=max_cache_seconds, max_cache_query_inspections=max_cache_query_inspections, + max_remote_cache_entries=max_remote_cache_entries, + max_local_cache_entries=max_local_cache_entries, ) + + +_cache_manager = _LocalMetadataCacheManager() diff --git a/awswrangler/athena/_utils.py b/awswrangler/athena/_utils.py index da7a85d41..06d2c11c2 100644 --- a/awswrangler/athena/_utils.py +++ b/awswrangler/athena/_utils.py @@ -1,11 +1,13 @@ """Utilities Module for Amazon Athena.""" import csv +import datetime import logging import pprint import time import warnings from decimal import Decimal -from typing import Any, Dict, Generator, List, NamedTuple, Optional, Union, cast +from heapq import heappop, heappush +from typing import Any, Dict, Generator, List, NamedTuple, Optional, Tuple, Union, cast import boto3 import botocore.exceptions @@ -39,6 +41,71 @@ class _WorkGroupConfig(NamedTuple): kms_key: Optional[str] +class _LocalMetadataCacheManager: + def __init__(self) -> None: + self._cache: Dict[str, Any] = dict() + self._pqueue: List[Tuple[datetime.datetime, str]] = [] + self._max_cache_size = 100 + + def update_cache(self, items: List[Dict[str, Any]]) -> None: + """ + Update the local metadata cache with new query metadata. + + Parameters + ---------- + items : List[Dict[str, Any]] + List of query execution metadata which is returned by boto3 `batch_get_query_execution()`. + + Returns + ------- + None + None. + """ + if self._pqueue: + oldest_item = self._cache[self._pqueue[0][1]] + items = list( + filter(lambda x: x["Status"]["SubmissionDateTime"] > oldest_item["Status"]["SubmissionDateTime"], items) + ) + + cache_oversize = len(self._cache) + len(items) - self._max_cache_size + for _ in range(cache_oversize): + _, query_execution_id = heappop(self._pqueue) + del self._cache[query_execution_id] + + for item in items[: self._max_cache_size]: + heappush(self._pqueue, (item["Status"]["SubmissionDateTime"], item["QueryExecutionId"])) + self._cache[item["QueryExecutionId"]] = item + + def sorted_successful_generator(self) -> List[Dict[str, Any]]: + """ + Sorts the entries in the local cache based on query Completion DateTime. + + This is useful to guarantee LRU caching rules. + + Returns + ------- + List[Dict[str, Any]] + Returns successful DDL and DML queries sorted by query completion time. + """ + filtered: List[Dict[str, Any]] = [] + for query in self._cache.values(): + if (query["Status"].get("State") == "SUCCEEDED") and (query.get("StatementType") in ["DDL", "DML"]): + filtered.append(query) + return sorted(filtered, key=lambda e: str(e["Status"]["CompletionDateTime"]), reverse=True) + + def __contains__(self, key: str) -> bool: + return key in self._cache + + @property + def max_cache_size(self) -> int: + """Property max_cache_size.""" + return self._max_cache_size + + @max_cache_size.setter + def max_cache_size(self, value: int) -> None: + self._max_cache_size = value + + def _get_s3_output(s3_output: Optional[str], wg_config: _WorkGroupConfig, boto3_session: boto3.Session) -> str: if wg_config.enforced and wg_config.s3_output is not None: return wg_config.s3_output @@ -171,6 +238,7 @@ def _get_query_metadata( # pylint: disable=too-many-statements boto3_session: boto3.Session, categories: Optional[List[str]] = None, query_execution_payload: Optional[Dict[str, Any]] = None, + metadata_cache_manager: Optional[_LocalMetadataCacheManager] = None, ) -> _QueryMetadata: """Get query metadata.""" if (query_execution_payload is not None) and (query_execution_payload["Status"]["State"] in _QUERY_FINAL_STATES): @@ -224,6 +292,8 @@ def _get_query_metadata( # pylint: disable=too-many-statements athena_statistics: Dict[str, Union[int, str]] = _query_execution_payload.get("Statistics", {}) manifest_location: Optional[str] = str(athena_statistics.get("DataManifestLocation")) + if metadata_cache_manager is not None and query_execution_id not in metadata_cache_manager: + metadata_cache_manager.update_cache(items=[_query_execution_payload]) query_metadata: _QueryMetadata = _QueryMetadata( execution_id=query_execution_id, dtype=dtype, diff --git a/tests/test_athena_cache.py b/tests/test_athena_cache.py index a95aa66b4..d4cb08830 100644 --- a/tests/test_athena_cache.py +++ b/tests/test_athena_cache.py @@ -127,3 +127,46 @@ def test_cache_query_semicolon(path, glue_database, glue_table): resolve_no_cache.assert_not_called() assert df.shape == df3.shape assert df.c0.sum() == df3.c0.sum() + + +def test_local_cache(path, glue_database, glue_table): + wr.config.max_local_cache_entries = 1 + + df = pd.DataFrame({"c0": [0, None]}, dtype="Int64") + wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite", database=glue_database, table=glue_table) + + with patch( + "awswrangler.athena._read._check_for_cached_results", + return_value=wr.athena._read._CacheInfo(has_valid_cache=False), + ) as mocked_cache_attempt: + df2 = wr.athena.read_sql_query( + f"SELECT * FROM {glue_table}", database=glue_database, ctas_approach=True, max_cache_seconds=0 + ) + mocked_cache_attempt.assert_called() + assert df.shape == df2.shape + assert df.c0.sum() == df2.c0.sum() + first_query_id = df2.query_metadata["QueryExecutionId"] + assert first_query_id in wr.athena._read._cache_manager + + df3 = wr.athena.read_sql_query( + f"SELECT * FROM {glue_table}", database=glue_database, ctas_approach=True, max_cache_seconds=0 + ) + mocked_cache_attempt.assert_called() + assert df.shape == df3.shape + assert df.c0.sum() == df3.c0.sum() + second_query_id = df3.query_metadata["QueryExecutionId"] + + assert first_query_id not in wr.athena._read._cache_manager + assert second_query_id in wr.athena._read._cache_manager + + +def test_paginated_remote_cache(path, glue_database, glue_table, workgroup1): + wr.config.max_remote_cache_entries = 100 + df = pd.DataFrame({"c0": [0, None]}, dtype="Int64") + wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite", database=glue_database, table=glue_table) + + df2 = wr.athena.read_sql_table( + glue_table, glue_database, ctas_approach=False, max_cache_seconds=1, workgroup=workgroup1 + ) + assert df.shape == df2.shape + assert df.c0.sum() == df2.c0.sum() diff --git a/tests/test_config.py b/tests/test_config.py index 5b4c94c65..5ccbb4800 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -116,3 +116,8 @@ def test_basics(path, glue_database, glue_table, workgroup0, workgroup1): os.environ["WR_GLUE_ENDPOINT_URL"] = f"https://glue.{region}.amazonaws.com" wr.config.reset() _urls_test(glue_database) + + +def test_athena_cache_configuration(): + wr.config.max_local_cache_entries = 20 + assert wr.config.max_remote_cache_entries == 20 diff --git a/tutorials/021 - Global Configurations.ipynb b/tutorials/021 - Global Configurations.ipynb index 816ec6549..745b7125c 100644 --- a/tutorials/021 - Global Configurations.ipynb +++ b/tutorials/021 - Global Configurations.ipynb @@ -29,10 +29,15 @@ "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "env: WR_DATABASE=default\nenv: WR_CTAS_APPROACH=False\nenv: WR_MAX_CACHE_SECONDS=900\nenv: WR_MAX_CACHE_QUERY_INSPECTIONS=500\n" + "env: WR_DATABASE=default\n", + "env: WR_CTAS_APPROACH=False\n", + "env: WR_MAX_CACHE_SECONDS=900\n", + "env: WR_MAX_CACHE_QUERY_INSPECTIONS=500\n", + "env: WR_MAX_REMOTE_CACHE_ENTRIES=50\n", + "env: WR_MAX_LOCAL_CACHE_ENTRIES=100\n" ] } ], @@ -40,7 +45,9 @@ "%env WR_DATABASE=default\n", "%env WR_CTAS_APPROACH=False\n", "%env WR_MAX_CACHE_SECONDS=900\n", - "%env WR_MAX_CACHE_QUERY_INSPECTIONS=500" + "%env WR_MAX_CACHE_QUERY_INSPECTIONS=500\n", + "%env WR_MAX_REMOTE_CACHE_ENTRIES=50\n", + "%env WR_MAX_LOCAL_CACHE_ENTRIES=100" ] }, { @@ -58,16 +65,13 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " foo\n", - "0 1" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n
foo
01
\n
" + "text/plain": " FOO\n0 1", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n
FOO
01
\n
" }, + "execution_count": 3, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ @@ -109,7 +113,9 @@ "wr.config.database = \"default\"\n", "wr.config.ctas_approach = False\n", "wr.config.max_cache_seconds = 900\n", - "wr.config.max_cache_query_inspections = 500" + "wr.config.max_cache_query_inspections = 500\n", + "wr.config.max_remote_cache_entries = 50\n", + "wr.config.max_local_cache_entries = 100" ] }, { @@ -118,16 +124,13 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " foo\n", - "0 1" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n
foo
01
\n
" + "text/plain": " FOO\n0 1", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n
FOO
01
\n
" }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], "source": [ @@ -147,15 +150,13 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - "" - ], - "text/html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameEnv. Variabletypenullableenforcedconfiguredvalue
0catalog_idWR_CATALOG_ID<class 'str'>TrueFalseFalseNone
1concurrent_partitioningWR_CONCURRENT_PARTITIONING<class 'bool'>FalseFalseFalseNone
2ctas_approachWR_CTAS_APPROACH<class 'bool'>FalseFalseTrueFalse
3databaseWR_DATABASE<class 'str'>TrueFalseTruedefault
4max_cache_query_inspectionsWR_MAX_CACHE_QUERY_INSPECTIONS<class 'int'>FalseFalseTrue500
5max_cache_secondsWR_MAX_CACHE_SECONDS<class 'int'>FalseFalseTrue900
6s3_block_sizeWR_S3_BLOCK_SIZE<class 'int'>FalseTrueFalseNone
7workgroupWR_WORKGROUP<class 'str'>FalseTrueFalseNone
8s3_endpoint_urlWR_S3_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
9athena_endpoint_urlWR_ATHENA_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
10sts_endpoint_urlWR_STS_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
11glue_endpoint_urlWR_GLUE_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
12redshift_endpoint_urlWR_REDSHIFT_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
13kms_endpoint_urlWR_KMS_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
14emr_endpoint_urlWR_EMR_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
" + "text/plain": "", + "text/html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameEnv. Variabletypenullableenforcedconfiguredvalue
0catalog_idWR_CATALOG_ID<class 'str'>TrueFalseFalseNone
1concurrent_partitioningWR_CONCURRENT_PARTITIONING<class 'bool'>FalseFalseFalseNone
2ctas_approachWR_CTAS_APPROACH<class 'bool'>FalseFalseTrueFalse
3databaseWR_DATABASE<class 'str'>TrueFalseTruedefault
4max_cache_query_inspectionsWR_MAX_CACHE_QUERY_INSPECTIONS<class 'int'>FalseFalseTrue500
5max_cache_secondsWR_MAX_CACHE_SECONDS<class 'int'>FalseFalseTrue900
6max_remote_cache_entriesWR_MAX_REMOTE_CACHE_ENTRIES<class 'int'>FalseFalseTrue50
7max_local_cache_entriesWR_MAX_LOCAL_CACHE_ENTRIES<class 'int'>FalseFalseTrue100
8s3_block_sizeWR_S3_BLOCK_SIZE<class 'int'>FalseTrueFalseNone
9workgroupWR_WORKGROUP<class 'str'>FalseTrueFalseNone
10s3_endpoint_urlWR_S3_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
11athena_endpoint_urlWR_ATHENA_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
12sts_endpoint_urlWR_STS_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
13glue_endpoint_urlWR_GLUE_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
14redshift_endpoint_urlWR_REDSHIFT_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
15kms_endpoint_urlWR_KMS_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
16emr_endpoint_urlWR_EMR_ENDPOINT_URL<class 'str'>TrueTrueTrueNone
" }, + "execution_count": 7, "metadata": {}, - "execution_count": 7 + "output_type": "execute_result" } ], "source": [ @@ -165,13 +166,9 @@ ], "metadata": { "kernelspec": { - "name": "Python 3.6.12 64-bit ('.venv': venv)", - "display_name": "Python 3.6.12 64-bit ('.venv': venv)", - "metadata": { - "interpreter": { - "hash": "655261d510271b2fa1eddf3c55861fc26682a337f3dd08c381abe2a293d3d6c9" - } - } + "name": "python3", + "language": "python", + "display_name": "Python 3" }, "language_info": { "codemirror_mode": {