diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d3420ade6..e898ec21e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -215,6 +215,10 @@ or ``./deploy-base.sh`` ``./deploy-databases.sh`` +* [OPTIONAL] Deploy the Cloudformation template `opensearch.yaml` (if you need to test Amazon OpenSearch Service). This step could take about 15 minutes to deploy. + +``./deploy-opensearch.sh`` + * Go to the `EC2 -> SecurityGroups` console, open the `aws-data-wrangler-*` security group and configure to accept your IP from any TCP port. - Alternatively run: @@ -244,7 +248,7 @@ or ``pytest -n 8 tests/test_db.py`` -* To run all data lake test functions for all python versions (Only if Amazon QuickSight is activated): +* To run all data lake test functions for all python versions (Only if Amazon QuickSight is activated and Amazon OpenSearch template is deployed): ``./test.sh`` diff --git a/README.md b/README.md index 16ab96390..bed91146f 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,7 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3 - [026 - Amazon Timestream](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/026%20-%20Amazon%20Timestream.ipynb) - [027 - Amazon Timestream 2](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/027%20-%20Amazon%20Timestream%202.ipynb) - [028 - Amazon DynamoDB](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/028%20-%20DynamoDB.ipynb) + - [031 - OpenSearch](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/031%20-%20OpenSearch.ipynb) - [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/2.11.0/api.html) - [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/2.11.0/api.html#amazon-s3) - [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/2.11.0/api.html#aws-glue-catalog) diff --git a/awswrangler/__init__.py b/awswrangler/__init__.py index ee068e4f6..c87d36823 100644 --- a/awswrangler/__init__.py +++ b/awswrangler/__init__.py @@ -17,6 +17,7 @@ emr, exceptions, mysql, + opensearch, postgresql, quicksight, redshift, @@ -38,6 +39,7 @@ "data_api", "dynamodb", "exceptions", + "opensearch", "quicksight", "s3", "sts", diff --git a/awswrangler/opensearch/__init__.py b/awswrangler/opensearch/__init__.py new file mode 100644 index 000000000..205e70b59 --- /dev/null +++ b/awswrangler/opensearch/__init__.py @@ -0,0 +1,17 @@ +"""Utilities Module for Amazon OpenSearch.""" + +from awswrangler.opensearch._read import search, search_by_sql +from awswrangler.opensearch._utils import connect +from awswrangler.opensearch._write import create_index, delete_index, index_csv, index_df, index_documents, index_json + +__all__ = [ + "connect", + "create_index", + "delete_index", + "index_csv", + "index_documents", + "index_df", + "index_json", + "search", + "search_by_sql", +] diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py new file mode 100644 index 000000000..8f2ef95c1 --- /dev/null +++ b/awswrangler/opensearch/_read.py @@ -0,0 +1,169 @@ +"""Amazon OpenSearch Read Module (PRIVATE).""" + +from typing import Any, Collection, Dict, List, Mapping, Optional, Union + +import pandas as pd +from opensearchpy import OpenSearch +from opensearchpy.helpers import scan + +from awswrangler.opensearch._utils import _get_distribution + + +def _resolve_fields(row: Mapping[str, Any]) -> Mapping[str, Any]: + fields = {} + for field in row: + if isinstance(row[field], dict): + nested_fields = _resolve_fields(row[field]) + for n_field, val in nested_fields.items(): + fields[f"{field}.{n_field}"] = val + else: + fields[field] = row[field] + return fields + + +def _hit_to_row(hit: Mapping[str, Any]) -> Mapping[str, Any]: + row: Dict[str, Any] = {} + for k in hit.keys(): + if k == "_source": + solved_fields = _resolve_fields(hit["_source"]) + row.update(solved_fields) + elif k.startswith("_"): + row[k] = hit[k] + return row + + +def _search_response_to_documents(response: Mapping[str, Any]) -> List[Mapping[str, Any]]: + return [_hit_to_row(hit) for hit in response["hits"]["hits"]] + + +def _search_response_to_df(response: Union[Mapping[str, Any], Any]) -> pd.DataFrame: + return pd.DataFrame(_search_response_to_documents(response)) + + +def search( + client: OpenSearch, + index: Optional[str] = "_all", + search_body: Optional[Dict[str, Any]] = None, + doc_type: Optional[str] = None, + is_scroll: Optional[bool] = False, + filter_path: Optional[Union[str, Collection[str]]] = None, + **kwargs: Any, +) -> pd.DataFrame: + """Return results matching query DSL as pandas dataframe. + + Parameters + ---------- + client : OpenSearch + instance of opensearchpy.OpenSearch to use. + index : str, optional + A comma-separated list of index names to search. + use `_all` or empty string to perform the operation on all indices. + search_body : Dict[str, Any], optional + The search definition using the [Query DSL](https://opensearch.org/docs/opensearch/query-dsl/full-text/). + doc_type : str, optional + Name of the document type (for Elasticsearch versions 5.x and earlier). + is_scroll : bool, optional + Allows to retrieve a large numbers of results from a single search request using + [scroll](https://opensearch.org/docs/opensearch/rest-api/scroll/) + for example, for machine learning jobs. + Because scroll search contexts consume a lot of memory, we suggest you don’t use the scroll operation + for frequent user queries. + filter_path : Union[str, Collection[str]], optional + Use the filter_path parameter to reduce the size of the OpenSearch Service response \ +(default: ['hits.hits._id','hits.hits._source']) + **kwargs : + KEYWORD arguments forwarded to [opensearchpy.OpenSearch.search]\ +(https://opensearch-py.readthedocs.io/en/latest/api.html#opensearchpy.OpenSearch.search) + and also to [opensearchpy.helpers.scan](https://opensearch-py.readthedocs.io/en/master/helpers.html#scan) + if `is_scroll=True` + + Returns + ------- + Union[pandas.DataFrame, Iterator[pandas.DataFrame]] + Results as Pandas DataFrame + + Examples + -------- + Searching an index using query DSL + + >>> import awswrangler as wr + >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT') + >>> df = wr.opensearch.search( + ... client=client, + ... index='movies', + ... search_body={ + ... "query": { + ... "match": { + ... "title": "wind" + ... } + ... } + ... } + ... ) + + + """ + if doc_type: + kwargs["doc_type"] = doc_type + + if filter_path is None: + filter_path = ["hits.hits._id", "hits.hits._source"] + + if is_scroll: + if isinstance(filter_path, str): + filter_path = [filter_path] + filter_path = ["_scroll_id", "_shards"] + list(filter_path) # required for scroll + documents_generator = scan(client, index=index, query=search_body, filter_path=filter_path, **kwargs) + documents = [_hit_to_row(doc) for doc in documents_generator] + df = pd.DataFrame(documents) + else: + response = client.search(index=index, body=search_body, filter_path=filter_path, **kwargs) + df = _search_response_to_df(response) + return df + + +def search_by_sql(client: OpenSearch, sql_query: str, **kwargs: Any) -> pd.DataFrame: + """Return results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe. + + Parameters + ---------- + client : OpenSearch + instance of opensearchpy.OpenSearch to use. + sql_query : str + SQL query + **kwargs : + KEYWORD arguments forwarded to request url (e.g.: filter_path, etc.) + + Returns + ------- + Union[pandas.DataFrame, Iterator[pandas.DataFrame]] + Results as Pandas DataFrame + + Examples + -------- + Searching an index using SQL query + + >>> import awswrangler as wr + >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT') + >>> df = wr.opensearch.search_by_sql( + >>> client=client, + >>> sql_query='SELECT * FROM my-index LIMIT 50' + >>> ) + + + """ + if _get_distribution(client) == "opensearch": + url = "/_plugins/_sql" + else: + url = "/_opendistro/_sql" + + kwargs["format"] = "json" + body = {"query": sql_query} + for size_att in ["size", "fetch_size"]: + if size_att in kwargs: + body["fetch_size"] = kwargs[size_att] + del kwargs[size_att] # unrecognized parameter + response = client.transport.perform_request( + "POST", url, headers={"Content-Type": "application/json"}, body=body, params=kwargs + ) + df = _search_response_to_df(response) + return df diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py new file mode 100644 index 000000000..a48b0eadc --- /dev/null +++ b/awswrangler/opensearch/_utils.py @@ -0,0 +1,108 @@ +"""Amazon OpenSearch Utils Module (PRIVATE).""" + +import logging +import re +from typing import Any, Optional + +import boto3 +from opensearchpy import OpenSearch, RequestsHttpConnection +from requests_aws4auth import AWS4Auth + +from awswrangler import _utils, exceptions + +_logger: logging.Logger = logging.getLogger(__name__) + + +def _get_distribution(client: OpenSearch) -> Any: + return client.info().get("version", {}).get("distribution", "elasticsearch") + + +def _get_version(client: OpenSearch) -> Any: + return client.info().get("version", {}).get("number") + + +def _get_version_major(client: OpenSearch) -> Any: + version = _get_version(client) + if version: + return int(version.split(".")[0]) + return None + + +def _strip_endpoint(endpoint: str) -> str: + uri_schema = re.compile(r"https?://") + return uri_schema.sub("", endpoint).strip().strip("/") + + +def connect( + host: str, + port: Optional[int] = 443, + boto3_session: Optional[boto3.Session] = boto3.Session(), + region: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, +) -> OpenSearch: + """Create a secure connection to the specified Amazon OpenSearch domain. + + Note + ---- + We use [opensearch-py](https://github.com/opensearch-project/opensearch-py), an OpenSearch low-level python client. + + The username and password are mandatory if the OS Cluster uses [Fine Grained Access Control]\ +(https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html). + If fine grained access control is disabled, session access key and secret keys are used. + + Parameters + ---------- + host : str + Amazon OpenSearch domain, for example: my-test-domain.us-east-1.es.amazonaws.com. + port : int + OpenSearch Service only accepts connections over port 80 (HTTP) or 443 (HTTPS) + boto3_session : boto3.Session(), optional + Boto3 Session. The default boto3 Session will be used if boto3_session receive None. + region : + AWS region of the Amazon OS domain. If not provided will be extracted from boto3_session. + username : + Fine-grained access control username. Mandatory if OS Cluster uses Fine Grained Access Control. + password : + Fine-grained access control password. Mandatory if OS Cluster uses Fine Grained Access Control. + + Returns + ------- + opensearchpy.OpenSearch + OpenSearch low-level client. + https://github.com/opensearch-project/opensearch-py/blob/main/opensearchpy/client/__init__.py + """ + valid_ports = {80, 443} + + if port not in valid_ports: + raise ValueError(f"results: port must be one of {valid_ports}") + + if username and password: + http_auth = (username, password) + else: + if region is None: + region = _utils.get_region_from_session(boto3_session=boto3_session) + creds = _utils.get_credentials_from_session(boto3_session=boto3_session) + if creds.access_key is None or creds.secret_key is None: + raise exceptions.InvalidArgument( + "One of IAM Role or AWS ACCESS_KEY_ID and SECRET_ACCESS_KEY must be " + "given. Unable to find ACCESS_KEY_ID and SECRET_ACCESS_KEY in boto3 " + "session." + ) + http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", session_token=creds.token) + try: + es = OpenSearch( + host=_strip_endpoint(host), + port=port, + http_auth=http_auth, + use_ssl=True, + verify_certs=True, + connection_class=RequestsHttpConnection, + timeout=30, + max_retries=10, + retry_on_timeout=True, + ) + except Exception as e: + _logger.error("Error connecting to Opensearch cluster. Please verify authentication details") + raise e + return es diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py new file mode 100644 index 000000000..62e9d146e --- /dev/null +++ b/awswrangler/opensearch/_write.py @@ -0,0 +1,573 @@ +"""Amazon OpenSearch Write Module (PRIVATE).""" + +import ast +import json +import logging +import uuid +from typing import Any, Dict, Generator, Iterable, List, Mapping, Optional, Tuple, Union + +import boto3 +import pandas as pd +import progressbar +from jsonpath_ng import parse +from jsonpath_ng.exceptions import JsonPathParserError +from opensearchpy import OpenSearch, TransportError +from opensearchpy.exceptions import NotFoundError +from opensearchpy.helpers import bulk +from pandas import notna + +from awswrangler._utils import parse_path +from awswrangler.opensearch._utils import _get_distribution, _get_version_major + +_logger: logging.Logger = logging.getLogger(__name__) +_logger.setLevel(logging.DEBUG) + +_DEFAULT_REFRESH_INTERVAL = "1s" + + +def _selected_keys(document: Mapping[str, Any], keys_to_write: Optional[List[str]]) -> Mapping[str, Any]: + if keys_to_write is None: + keys_to_write = list(document.keys()) + keys_to_write = list(filter(lambda x: x != "_id", keys_to_write)) + return {key: document[key] for key in keys_to_write} + + +def _actions_generator( + documents: Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]], + index: str, + doc_type: Optional[str], + keys_to_write: Optional[List[str]], + id_keys: Optional[List[str]], + bulk_size: int = 10000, +) -> Generator[List[Dict[str, Any]], None, None]: + bulk_chunk_documents = [] + for i, document in enumerate(documents): + if id_keys: + _id = "-".join([str(document[id_key]) for id_key in id_keys]) + else: + _id = document.get("_id", uuid.uuid4()) + bulk_chunk_documents.append( + { + "_index": index, + "_type": doc_type, + "_id": _id, + "_source": _selected_keys(document, keys_to_write), + } + ) + if (i + 1) % bulk_size == 0: + yield bulk_chunk_documents + bulk_chunk_documents = [] + if len(bulk_chunk_documents) > 0: + yield bulk_chunk_documents + + +def _df_doc_generator(df: pd.DataFrame) -> Generator[Dict[str, Any], None, None]: + def _deserialize(v: Any) -> Any: + if isinstance(v, str): + v = v.strip() + if v.startswith("{") and v.endswith("}") or v.startswith("[") and v.endswith("]"): + try: + v = json.loads(v) + except json.decoder.JSONDecodeError: + try: + v = ast.literal_eval(v) # if properties are enclosed with single quotes + if not isinstance(v, dict): + _logger.warning("could not convert string to json: %s", v) + except SyntaxError as e: + _logger.warning("could not convert string to json: %s", v) + _logger.warning(e) + return v + + df_iter = df.iterrows() + for _, document in df_iter: + yield {k: _deserialize(v) for k, v in document.items() if notna(v)} + + +def _file_line_generator(path: str, is_json: bool = False) -> Generator[Any, None, None]: + with open(path) as fp: # pylint: disable=W1514 + for line in fp: + if is_json: + yield json.loads(line) + else: + yield line.strip() + + +def _get_documents_w_json_path(documents: List[Mapping[str, Any]], json_path: str) -> List[Any]: + try: + jsonpath_expression = parse(json_path) + except JsonPathParserError as e: + _logger.error("invalid json_path: %s", json_path) + raise e + output_documents = [] + for doc in documents: + for match in jsonpath_expression.find(doc): + match_value = match.value + if isinstance(match_value, list): + output_documents += match_value + elif isinstance(match_value, dict): + output_documents.append(match_value) + else: + msg = f"expected json_path value to be a list/dict. received type {type(match_value)} ({match_value})" + raise ValueError(msg) + return output_documents + + +def _get_refresh_interval(client: OpenSearch, index: str) -> Any: + url = f"/{index}/_settings" + try: + response = client.transport.perform_request("GET", url) + index_settings = response.get(index, {}).get("index", {}) # type: ignore + refresh_interval = index_settings.get("refresh_interval", _DEFAULT_REFRESH_INTERVAL) + return refresh_interval + except NotFoundError: + return None + + +def _set_refresh_interval(client: OpenSearch, index: str, refresh_interval: Optional[Any]) -> Any: + url = f"/{index}/_settings" + body = {"index": {"refresh_interval": refresh_interval}} + response = client.transport.perform_request("PUT", url, headers={"Content-Type": "application/json"}, body=body) + + return response + + +def _disable_refresh_interval( + client: OpenSearch, + index: str, +) -> Any: + return _set_refresh_interval(client=client, index=index, refresh_interval="-1") + + +def create_index( + client: OpenSearch, + index: str, + doc_type: Optional[str] = None, + settings: Optional[Dict[str, Any]] = None, + mappings: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + """Create an index. + + Parameters + ---------- + client : OpenSearch + instance of opensearchpy.OpenSearch to use. + index : str + Name of the index. + doc_type : str, optional + Name of the document type (for Elasticsearch versions 5.x and earlier). + settings : Dict[str, Any], optional + Index settings + https://opensearch.org/docs/opensearch/rest-api/create-index/#index-settings + mappings : Dict[str, Any], optional + Index mappings + https://opensearch.org/docs/opensearch/rest-api/create-index/#mappings + + Returns + ------- + Dict[str, Any] + OpenSearch rest api response + https://opensearch.org/docs/opensearch/rest-api/create-index/#response. + + Examples + -------- + Creating an index. + + >>> import awswrangler as wr + >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT') + >>> response = wr.opensearch.create_index( + ... client=client, + ... index="sample-index1", + ... mappings={ + ... "properties": { + ... "age": { "type" : "integer" } + ... } + ... }, + ... settings={ + ... "index": { + ... "number_of_shards": 2, + ... "number_of_replicas": 1 + ... } + ... } + ... ) + + """ + body = {} + if mappings: + if _get_distribution(client) == "opensearch" or _get_version_major(client) >= 7: + body["mappings"] = mappings # doc type deprecated + else: + if doc_type: + body["mappings"] = {doc_type: mappings} + else: + body["mappings"] = {index: mappings} + if settings: + body["settings"] = settings + if body == {}: + body = None # type: ignore + + # ignore 400 cause by IndexAlreadyExistsException when creating an index + response: Dict[str, Any] = client.indices.create(index, body=body, ignore=400) + if "error" in response: + _logger.warning(response) + if str(response["error"]).startswith("MapperParsingException"): + raise ValueError(response["error"]) + return response + + +def delete_index(client: OpenSearch, index: str) -> Dict[str, Any]: + """Create an index. + + Parameters + ---------- + client : OpenSearch + instance of opensearchpy.OpenSearch to use. + index : str + Name of the index. + + Returns + ------- + Dict[str, Any] + OpenSearch rest api response + + Examples + -------- + Creating an index. + + >>> import awswrangler as wr + >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT') + >>> response = wr.opensearch.delete_index( + ... client=client, + ... index="sample-index1" + ... ) + + """ + # ignore 400/404 IndexNotFoundError exception + response: Dict[str, Any] = client.indices.delete(index, ignore=[400, 404]) + if "error" in response: + _logger.warning(response) + return response + + +def index_json( + client: OpenSearch, + path: str, + index: str, + doc_type: Optional[str] = None, + boto3_session: Optional[boto3.Session] = boto3.Session(), + json_path: Optional[str] = None, + **kwargs: Any, +) -> Dict[str, Any]: + """Index all documents from JSON file to OpenSearch index. + + The JSON file should be in a JSON-Lines text format (newline-delimited JSON) - https://jsonlines.org/ + OR if the is a single large JSON please provide `json_path`. + + Parameters + ---------- + client : OpenSearch + instance of opensearchpy.OpenSearch to use. + path : str + s3 or local path to the JSON file which contains the documents. + index : str + Name of the index. + doc_type : str, optional + Name of the document type (for Elasticsearch versions 5.x and earlier). + json_path : str, optional + JsonPath expression to specify explicit path to a single name element + in a JSON hierarchical data structure. + Read more about [JsonPath](https://jsonpath.com) + boto3_session : boto3.Session(), optional + Boto3 Session to be used to access s3 if s3 path is provided. + The default boto3 Session will be used if boto3_session receive None. + **kwargs : + KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents` + which is used to execute the operation + + Returns + ------- + Dict[str, Any] + Response payload + https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response. + + Examples + -------- + Writing contents of JSON file + + >>> import awswrangler as wr + >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT') + >>> wr.opensearch.index_json( + ... client=client, + ... path='docs.json', + ... index='sample-index1' + ... ) + """ + _logger.debug("indexing %s from %s", index, path) + + if boto3_session is None: + raise ValueError("boto3_session cannot be None") + + if path.startswith("s3://"): + bucket, key = parse_path(path) + s3 = boto3_session.client("s3") + obj = s3.get_object(Bucket=bucket, Key=key) + body = obj["Body"].read() + lines = body.splitlines() + documents = [json.loads(line) for line in lines] + if json_path: + documents = _get_documents_w_json_path(documents, json_path) + else: # local path + documents = list(_file_line_generator(path, is_json=True)) + if json_path: + documents = _get_documents_w_json_path(documents, json_path) + return index_documents(client=client, documents=documents, index=index, doc_type=doc_type, **kwargs) + + +def index_csv( + client: OpenSearch, + path: str, + index: str, + doc_type: Optional[str] = None, + pandas_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, +) -> Dict[str, Any]: + """Index all documents from a CSV file to OpenSearch index. + + Parameters + ---------- + client : OpenSearch + instance of opensearchpy.OpenSearch to use. + path : str + s3 or local path to the CSV file which contains the documents. + index : str + Name of the index. + doc_type : str, optional + Name of the document type (for Elasticsearch versions 5.x and earlier). + pandas_kwargs : Dict[str, Any], optional + Dictionary of arguments forwarded to pandas.read_csv(). + e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']} + https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html + Note: these params values are enforced: `skip_blank_lines=True` + **kwargs : + KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents` + which is used to execute the operation + + Returns + ------- + Dict[str, Any] + Response payload + https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response. + + Examples + -------- + Writing contents of CSV file + + >>> import awswrangler as wr + >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT') + >>> wr.opensearch.index_csv( + ... client=client, + ... path='docs.csv', + ... index='sample-index1' + ... ) + + Writing contents of CSV file using pandas_kwargs + + >>> import awswrangler as wr + >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT') + >>> wr.opensearch.index_csv( + ... client=client, + ... path='docs.csv', + ... index='sample-index1', + ... pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']} + ... ) + """ + _logger.debug("indexing %s from %s", index, path) + if pandas_kwargs is None: + pandas_kwargs = {} + enforced_pandas_params = { + "skip_blank_lines": True, + # 'na_filter': True # will generate Nan value for empty cells. We remove Nan keys in _df_doc_generator + # Note: if the user will pass na_filter=False null fields will be indexed as well ({"k1": null, "k2": null}) + } + pandas_kwargs.update(enforced_pandas_params) + df = pd.read_csv(path, **pandas_kwargs) + return index_df(client, df=df, index=index, doc_type=doc_type, **kwargs) + + +def index_df( + client: OpenSearch, df: pd.DataFrame, index: str, doc_type: Optional[str] = None, **kwargs: Any +) -> Dict[str, Any]: + """Index all documents from a DataFrame to OpenSearch index. + + Parameters + ---------- + client : OpenSearch + instance of opensearchpy.OpenSearch to use. + df : pd.DataFrame + Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html + index : str + Name of the index. + doc_type : str, optional + Name of the document type (for Elasticsearch versions 5.x and earlier). + **kwargs : + KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents` + which is used to execute the operation + + Returns + ------- + Dict[str, Any] + Response payload + https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response. + + Examples + -------- + Writing rows of DataFrame + + >>> import awswrangler as wr + >>> import pandas as pd + >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT') + >>> wr.opensearch.index_df( + ... client=client, + ... df=pd.DataFrame([{'_id': '1'}, {'_id': '2'}, {'_id': '3'}]), + ... index='sample-index1' + ... ) + """ + return index_documents(client=client, documents=_df_doc_generator(df), index=index, doc_type=doc_type, **kwargs) + + +def index_documents( + client: OpenSearch, + documents: Iterable[Mapping[str, Any]], + index: str, + doc_type: Optional[str] = None, + keys_to_write: Optional[List[str]] = None, + id_keys: Optional[List[str]] = None, + ignore_status: Optional[Union[List[Any], Tuple[Any]]] = None, + bulk_size: int = 1000, + chunk_size: Optional[int] = 500, + max_chunk_bytes: Optional[int] = 100 * 1024 * 1024, + max_retries: Optional[int] = 5, + initial_backoff: Optional[int] = 2, + max_backoff: Optional[int] = 600, + **kwargs: Any, +) -> Dict[str, Any]: + """Index all documents to OpenSearch index. + + Note + ---- + Some of the args are referenced from opensearch-py client library (bulk helpers) + https://opensearch-py.readthedocs.io/en/latest/helpers.html#opensearchpy.helpers.bulk + https://opensearch-py.readthedocs.io/en/latest/helpers.html#opensearchpy.helpers.streaming_bulk + + If you receive `Error 429 (Too Many Requests) /_bulk` please to to decrease `bulk_size` value. + Please also consider modifying the cluster size and instance type - + Read more here: https://aws.amazon.com/premiumsupport/knowledge-center/resolve-429-error-es/ + + Parameters + ---------- + client : OpenSearch + instance of opensearchpy.OpenSearch to use. + documents : Iterable[Mapping[str, Any]] + List which contains the documents that will be inserted. + index : str + Name of the index. + doc_type : str, optional + Name of the document type (for Elasticsearch versions 5.x and earlier). + keys_to_write : List[str], optional + list of keys to index. If not provided all keys will be indexed + id_keys : List[str], optional + list of keys that compound document unique id. If not provided will use `_id` key if exists, + otherwise will generate unique identifier for each document. + ignore_status: Union[List[Any], Tuple[Any]], optional + list of HTTP status codes that you want to ignore (not raising an exception) + bulk_size: int, + number of docs in each _bulk request (default: 1000) + chunk_size : int, optional + number of docs in one chunk sent to es (default: 500) + max_chunk_bytes: int, optional + the maximum size of the request in bytes (default: 100MB) + max_retries : int, optional + maximum number of times a document will be retried when + ``429`` is received, set to 0 (default) for no retries on ``429`` (default: 2) + initial_backoff : int, optional + number of seconds we should wait before the first retry. + Any subsequent retries will be powers of ``initial_backoff*2**retry_number`` (default: 2) + max_backoff: int, optional + maximum number of seconds a retry will wait (default: 600) + **kwargs : + KEYWORD arguments forwarded to bulk operation + elasticsearch >= 7.10.2 / opensearch: \ +https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters + elasticsearch < 7.10.2: \ +https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters + + Returns + ------- + Dict[str, Any] + Response payload + https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response. + + Examples + -------- + Writing documents + + >>> import awswrangler as wr + >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT') + >>> wr.opensearch.index_documents( + ... documents=[{'_id': '1', 'value': 'foo'}, {'_id': '2', 'value': 'bar'}], + ... index='sample-index1' + ... ) + """ + if not isinstance(documents, list): + documents = list(documents) + total_documents = len(documents) + _logger.debug("indexing %s documents into %s", total_documents, index) + + actions = _actions_generator( + documents, index, doc_type, keys_to_write=keys_to_write, id_keys=id_keys, bulk_size=bulk_size + ) + + success = 0 + errors: List[Any] = [] + refresh_interval = None + try: + widgets = [ + progressbar.Percentage(), + progressbar.SimpleProgress(format=" (%(value_s)s/%(max_value_s)s)"), + progressbar.Bar(), + progressbar.Timer(), + ] + progress_bar = progressbar.ProgressBar(widgets=widgets, max_value=total_documents, prefix="Indexing: ").start() + for i, bulk_chunk_documents in enumerate(actions): + if i == 1: # second bulk iteration, in case the index didn't exist before + refresh_interval = _get_refresh_interval(client, index) + _disable_refresh_interval(client, index) + _logger.debug("running bulk index of %s documents", len(bulk_chunk_documents)) + _success, _errors = bulk( + client=client, + actions=bulk_chunk_documents, + ignore_status=ignore_status, + chunk_size=chunk_size, + max_chunk_bytes=max_chunk_bytes, + max_retries=max_retries, + initial_backoff=initial_backoff, + max_backoff=max_backoff, + request_timeout=30, + **kwargs, + ) + success += _success + errors += _errors # type: ignore + _logger.debug("indexed %s documents (%s/%s)", _success, success, total_documents) + progress_bar.update(success, force=True) + except TransportError as e: + if str(e.status_code) == "429": # Too Many Requests + _logger.error( + "Error 429 (Too Many Requests):" + "Try to tune bulk_size parameter." + "Read more here: https://aws.amazon.com/premiumsupport/knowledge-center/resolve-429-error-es" + ) + raise e + + finally: + _set_refresh_interval(client, index, refresh_interval) + + return {"success": success, "errors": errors} diff --git a/poetry.lock b/poetry.lock index e9ff84c4d..d3282f25c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -394,7 +394,7 @@ python-versions = ">=3.6, <3.7" name = "decorator" version = "5.1.0" description = "Decorators for Humans" -category = "dev" +category = "main" optional = false python-versions = ">=3.5" @@ -719,6 +719,19 @@ python-versions = "*" [package.extras] dev = ["hypothesis"] +[[package]] +name = "jsonpath-ng" +version = "1.5.3" +description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +decorator = "*" +ply = "*" +six = "*" + [[package]] name = "jsonschema" version = "4.0.0" @@ -1147,6 +1160,23 @@ python-versions = ">=3.6" [package.dependencies] et-xmlfile = "*" +[[package]] +name = "opensearch-py" +version = "1.0.0" +description = "Python low-level client for OpenSearch" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" + +[package.dependencies] +certifi = "*" +urllib3 = ">=1.21.1,<2" + +[package.extras] +async = ["aiohttp (>=3,<4)"] +develop = ["requests (>=2.0.0,<3.0.0)", "coverage", "mock", "pyyaml", "pytest", "pytest-cov", "black", "jinja2"] +requests = ["requests (>=2.4.0,<3.0.0)"] + [[package]] name = "packaging" version = "21.0" @@ -1283,6 +1313,30 @@ importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "ply" +version = "3.11" +description = "Python Lex & Yacc" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "progressbar2" +version = "3.53.3" +description = "A Python Progressbar library to provide visual (yet text based) progress to long running operations." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +python-utils = ">=2.3.0" +six = "*" + +[package.extras] +docs = ["sphinx (>=1.7.4)"] +tests = ["flake8 (>=3.7.7)", "pytest (>=4.6.9)", "pytest-cov (>=2.6.1)", "freezegun (>=0.3.11)", "sphinx (>=1.8.5)"] + [[package]] name = "prometheus-client" version = "0.11.0" @@ -1551,6 +1605,17 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "python-utils" +version = "2.5.6" +description = "Python Utils is a module with some convenient utilities not included with the standard Python install" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + [[package]] name = "pytz" version = "2021.3" @@ -1634,6 +1699,18 @@ urllib3 = ">=1.21.1,<1.27" socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] +[[package]] +name = "requests-aws4auth" +version = "1.1.1" +description = "AWS4 authentication for Requests" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +requests = "*" +six = "*" + [[package]] name = "requests-unixsocket" version = "0.2.0" @@ -2529,6 +2606,11 @@ json5 = [ {file = "json5-0.9.6-py2.py3-none-any.whl", hash = "sha256:823e510eb355949bed817e1f3e2d682455dc6af9daf6066d5698d6a2ca4481c2"}, {file = "json5-0.9.6.tar.gz", hash = "sha256:9175ad1bc248e22bb8d95a8e8d765958bf0008fef2fe8abab5bc04e0f1ac8302"}, ] +jsonpath-ng = [ + {file = "jsonpath-ng-1.5.3.tar.gz", hash = "sha256:a273b182a82c1256daab86a313b937059261b5c5f8c4fa3fc38b882b344dd567"}, + {file = "jsonpath_ng-1.5.3-py2-none-any.whl", hash = "sha256:f75b95dbecb8a0f3b86fd2ead21c2b022c3f5770957492b9b6196ecccfeb10aa"}, + {file = "jsonpath_ng-1.5.3-py3-none-any.whl", hash = "sha256:292a93569d74029ba75ac2dc3d3630fc0e17b2df26119a165fa1d498ca47bf65"}, +] jsonschema = [ {file = "jsonschema-4.0.0-py3-none-any.whl", hash = "sha256:c773028c649441ab980015b5b622f4cd5134cf563daaf0235ca4b73cc3734f20"}, {file = "jsonschema-4.0.0.tar.gz", hash = "sha256:bc51325b929171791c42ebc1c70b9713eb134d3bb8ebd5474c8b659b15be6d86"}, @@ -2856,6 +2938,10 @@ openpyxl = [ {file = "openpyxl-3.0.9-py2.py3-none-any.whl", hash = "sha256:8f3b11bd896a95468a4ab162fc4fcd260d46157155d1f8bfaabb99d88cfcf79f"}, {file = "openpyxl-3.0.9.tar.gz", hash = "sha256:40f568b9829bf9e446acfffce30250ac1fa39035124d55fc024025c41481c90f"}, ] +opensearch-py = [ + {file = "opensearch-py-1.0.0.tar.gz", hash = "sha256:fa952836cabfa1b2fb05f852edc1a373342494345e89fd52b7124daf4d296bb4"}, + {file = "opensearch_py-1.0.0-py2.py3-none-any.whl", hash = "sha256:17afebc25dc890b96c4e9ec8692dcfdb6842c028ce8c2d252e8f55c587960177"}, +] packaging = [ {file = "packaging-21.0-py3-none-any.whl", hash = "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"}, {file = "packaging-21.0.tar.gz", hash = "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7"}, @@ -2943,6 +3029,14 @@ pluggy = [ {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, ] +ply = [ + {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, + {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, +] +progressbar2 = [ + {file = "progressbar2-3.53.3-py2.py3-none-any.whl", hash = "sha256:6610fe393a4591967ecf9062d42c0663c8862092245c490e5971ec5f348755ca"}, + {file = "progressbar2-3.53.3.tar.gz", hash = "sha256:f4e1c2d48e608850c59f793d6e74ccdebbcbaac7ffe917d45e9646ec0d664d6d"}, +] prometheus-client = [ {file = "prometheus_client-0.11.0-py2.py3-none-any.whl", hash = "sha256:b014bc76815eb1399da8ce5fc84b7717a3e63652b0c0f8804092c9363acab1b2"}, {file = "prometheus_client-0.11.0.tar.gz", hash = "sha256:3a8baade6cb80bcfe43297e33e7623f3118d660d41387593758e2fb1ea173a86"}, @@ -3094,6 +3188,10 @@ python-dateutil = [ python-levenshtein = [ {file = "python-Levenshtein-0.12.2.tar.gz", hash = "sha256:dc2395fbd148a1ab31090dd113c366695934b9e85fe5a4b2a032745efd0346f6"}, ] +python-utils = [ + {file = "python-utils-2.5.6.tar.gz", hash = "sha256:352d5b1febeebf9b3cdb9f3c87a3b26ef22d3c9e274a8ec1e7048ecd2fac4349"}, + {file = "python_utils-2.5.6-py2.py3-none-any.whl", hash = "sha256:18fbc1a1df9a9061e3059a48ebe5c8a66b654d688b0e3ecca8b339a7f168f208"}, +] pytz = [ {file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"}, {file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"}, @@ -3206,6 +3304,10 @@ requests = [ {file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"}, {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"}, ] +requests-aws4auth = [ + {file = "requests-aws4auth-1.1.1.tar.gz", hash = "sha256:c0883346ce30b5018903a67da88df72f73ff06e1a320845bba9cd85e811ba0ba"}, + {file = "requests_aws4auth-1.1.1-py2.py3-none-any.whl", hash = "sha256:dfd9f930ffde48a756b72b55698a8522875ea6358dcffbcc44a66700ace31783"}, +] requests-unixsocket = [ {file = "requests-unixsocket-0.2.0.tar.gz", hash = "sha256:9e5c1a20afc3cf786197ae59c79bcdb0e7565f218f27df5f891307ee8817c1ea"}, {file = "requests_unixsocket-0.2.0-py2.py3-none-any.whl", hash = "sha256:014d07bfb66dc805a011a8b4b306cf4ec96d2eddb589f6b2b5765e626f0dc0cc"}, diff --git a/pyproject.toml b/pyproject.toml index 36a4df379..23a635344 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,10 @@ pyodbc = { version = "~4.0.32", optional = true } sphinx-bootstrap-theme = "^0.8.0" Sphinx = "^4.2.0" tox = "^3.24.4" +requests-aws4auth = "^1.1.1" +jsonpath-ng = "^1.5.3" +progressbar2 = "^3.53.3" +opensearch-py = "^1.0.0" [tool.poetry.extras] diff --git a/test_infra/app.py b/test_infra/app.py index 4e27aa261..8c3395e22 100644 --- a/test_infra/app.py +++ b/test_infra/app.py @@ -2,6 +2,7 @@ from aws_cdk import core as cdk from stacks.base_stack import BaseStack from stacks.databases_stack import DatabasesStack +from stacks.opensearch_stack import OpenSearchStack app = cdk.App() @@ -14,4 +15,12 @@ base.get_key, ) +OpenSearchStack( + app, + "aws-data-wrangler-opensearch", + base.get_vpc, + base.get_bucket, + base.get_key, +) + app.synth() diff --git a/test_infra/poetry.lock b/test_infra/poetry.lock index f68d38031..aa17ff35f 100644 --- a/test_infra/poetry.lock +++ b/test_infra/poetry.lock @@ -1,496 +1,638 @@ [[package]] name = "attrs" -version = "20.3.0" +version = "21.2.0" description = "Classes Without Boilerplate" category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" [package.extras] -dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "furo", "sphinx", "pre-commit"] -docs = ["furo", "sphinx", "zope.interface"] -tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"] -tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six"] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"] +docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"] [[package]] name = "aws-cdk.assets" -version = "1.115.0" +version = "1.124.0" description = "This module is deprecated. All types are now available under the core module" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-applicationautoscaling" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::ApplicationAutoScaling" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-autoscaling-common" = "1.115.0" -"aws-cdk.aws-cloudwatch" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-autoscaling-common" = "1.124.0" +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-autoscaling-common" -version = "1.115.0" +version = "1.124.0" description = "Common implementation package for @aws-cdk/aws-autoscaling and @aws-cdk/aws-applicationautoscaling" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" +publication = ">=0.0.3" + +[[package]] +name = "aws-cdk.aws-certificatemanager" +version = "1.124.0" +description = "The CDK Construct Library for AWS::CertificateManager" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-lambda" = "1.124.0" +"aws-cdk.aws-route53" = "1.124.0" +"aws-cdk.core" = "1.124.0" +constructs = ">=3.3.69,<4.0.0" +jsii = ">=1.34.0,<2.0.0" +publication = ">=0.0.3" + +[[package]] +name = "aws-cdk.aws-cloudformation" +version = "1.124.0" +description = "The CDK Construct Library for AWS::CloudFormation" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-lambda" = "1.124.0" +"aws-cdk.aws-s3" = "1.124.0" +"aws-cdk.aws-sns" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" +constructs = ">=3.3.69,<4.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-cloudwatch" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::CloudWatch" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-codeguruprofiler" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::CodeGuruProfiler" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" +publication = ">=0.0.3" + +[[package]] +name = "aws-cdk.aws-codestarnotifications" +version = "1.124.0" +description = "The CDK Construct Library for AWS::CodeStarNotifications" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +"aws-cdk.core" = "1.124.0" +constructs = ">=3.3.69,<4.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-ec2" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::EC2" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-cloudwatch" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.aws-logs" = "1.115.0" -"aws-cdk.aws-s3" = "1.115.0" -"aws-cdk.aws-s3-assets" = "1.115.0" -"aws-cdk.aws-ssm" = "1.115.0" -"aws-cdk.cloud-assembly-schema" = "1.115.0" -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" -"aws-cdk.region-info" = "1.115.0" +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-logs" = "1.124.0" +"aws-cdk.aws-s3" = "1.124.0" +"aws-cdk.aws-s3-assets" = "1.124.0" +"aws-cdk.aws-ssm" = "1.124.0" +"aws-cdk.cloud-assembly-schema" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" +"aws-cdk.region-info" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-ecr" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::ECR" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-events" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-events" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-ecr-assets" -version = "1.115.0" +version = "1.124.0" description = "Docker image assets deployed to ECR" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.assets" = "1.115.0" -"aws-cdk.aws-ecr" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-s3" = "1.115.0" -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" +"aws-cdk.assets" = "1.124.0" +"aws-cdk.aws-ecr" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-s3" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-efs" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::EFS" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-ec2" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.cloud-assembly-schema" = "1.115.0" -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" +"aws-cdk.aws-ec2" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.cloud-assembly-schema" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-events" -version = "1.115.0" +version = "1.124.0" description = "Amazon EventBridge Construct Library" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-glue" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::Glue" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-ec2" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.aws-s3" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.assets" = "1.124.0" +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-ec2" = "1.124.0" +"aws-cdk.aws-events" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-logs" = "1.124.0" +"aws-cdk.aws-s3" = "1.124.0" +"aws-cdk.aws-s3-assets" = "1.124.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-iam" -version = "1.115.0" +version = "1.124.0" description = "CDK routines for easily assigning correct and minimal IAM permissions" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.core" = "1.115.0" -"aws-cdk.region-info" = "1.115.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.region-info" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-kms" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::KMS" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.cloud-assembly-schema" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-lambda" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::Lambda" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-applicationautoscaling" = "1.115.0" -"aws-cdk.aws-cloudwatch" = "1.115.0" -"aws-cdk.aws-codeguruprofiler" = "1.115.0" -"aws-cdk.aws-ec2" = "1.115.0" -"aws-cdk.aws-ecr" = "1.115.0" -"aws-cdk.aws-ecr-assets" = "1.115.0" -"aws-cdk.aws-efs" = "1.115.0" -"aws-cdk.aws-events" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.aws-logs" = "1.115.0" -"aws-cdk.aws-s3" = "1.115.0" -"aws-cdk.aws-s3-assets" = "1.115.0" -"aws-cdk.aws-signer" = "1.115.0" -"aws-cdk.aws-sqs" = "1.115.0" -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" +"aws-cdk.aws-applicationautoscaling" = "1.124.0" +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-codeguruprofiler" = "1.124.0" +"aws-cdk.aws-ec2" = "1.124.0" +"aws-cdk.aws-ecr" = "1.124.0" +"aws-cdk.aws-ecr-assets" = "1.124.0" +"aws-cdk.aws-efs" = "1.124.0" +"aws-cdk.aws-events" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-logs" = "1.124.0" +"aws-cdk.aws-s3" = "1.124.0" +"aws-cdk.aws-s3-assets" = "1.124.0" +"aws-cdk.aws-signer" = "1.124.0" +"aws-cdk.aws-sqs" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" +"aws-cdk.region-info" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-logs" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::Logs" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-cloudwatch" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.aws-s3-assets" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-s3-assets" = "1.124.0" +"aws-cdk.core" = "1.124.0" +constructs = ">=3.3.69,<4.0.0" +jsii = ">=1.34.0,<2.0.0" +publication = ">=0.0.3" + +[[package]] +name = "aws-cdk.aws-opensearchservice" +version = "1.124.0" +description = "The CDK Construct Library for AWS::OpenSearchService" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +"aws-cdk.aws-certificatemanager" = "1.124.0" +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-ec2" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-logs" = "1.124.0" +"aws-cdk.aws-route53" = "1.124.0" +"aws-cdk.aws-secretsmanager" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.custom-resources" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-rds" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::RDS" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-cloudwatch" = "1.115.0" -"aws-cdk.aws-ec2" = "1.115.0" -"aws-cdk.aws-events" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.aws-logs" = "1.115.0" -"aws-cdk.aws-s3" = "1.115.0" -"aws-cdk.aws-secretsmanager" = "1.115.0" -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-ec2" = "1.124.0" +"aws-cdk.aws-events" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-logs" = "1.124.0" +"aws-cdk.aws-s3" = "1.124.0" +"aws-cdk.aws-secretsmanager" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-redshift" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::Redshift" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-ec2" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.aws-s3" = "1.115.0" -"aws-cdk.aws-secretsmanager" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-ec2" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-lambda" = "1.124.0" +"aws-cdk.aws-s3" = "1.124.0" +"aws-cdk.aws-secretsmanager" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.custom-resources" = "1.124.0" +constructs = ">=3.3.69,<4.0.0" +jsii = ">=1.34.0,<2.0.0" +publication = ">=0.0.3" + +[[package]] +name = "aws-cdk.aws-route53" +version = "1.124.0" +description = "The CDK Construct Library for AWS::Route53" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +"aws-cdk.aws-ec2" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-logs" = "1.124.0" +"aws-cdk.cloud-assembly-schema" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.custom-resources" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-s3" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::S3" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-events" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" +"aws-cdk.aws-events" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-s3-assets" -version = "1.115.0" +version = "1.124.0" description = "Deploy local files and directories to S3" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.assets" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.aws-s3" = "1.115.0" -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" +"aws-cdk.assets" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-s3" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-sam" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for the AWS Serverless Application Model (SAM) resources" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.core" = "1.115.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-secretsmanager" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::SecretsManager" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-ec2" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.aws-lambda" = "1.115.0" -"aws-cdk.aws-sam" = "1.115.0" -"aws-cdk.core" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" +"aws-cdk.aws-ec2" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-lambda" = "1.124.0" +"aws-cdk.aws-sam" = "1.124.0" +"aws-cdk.core" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-signer" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::Signer" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.core" = "1.115.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" +publication = ">=0.0.3" + +[[package]] +name = "aws-cdk.aws-sns" +version = "1.124.0" +description = "The CDK Construct Library for AWS::SNS" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-codestarnotifications" = "1.124.0" +"aws-cdk.aws-events" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.aws-sqs" = "1.124.0" +"aws-cdk.core" = "1.124.0" +constructs = ">=3.3.69,<4.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-sqs" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::SQS" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-cloudwatch" = "1.115.0" -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-cloudwatch" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.aws-ssm" -version = "1.115.0" +version = "1.124.0" description = "The CDK Construct Library for AWS::SSM" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.aws-iam" = "1.115.0" -"aws-cdk.aws-kms" = "1.115.0" -"aws-cdk.cloud-assembly-schema" = "1.115.0" -"aws-cdk.core" = "1.115.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-kms" = "1.124.0" +"aws-cdk.cloud-assembly-schema" = "1.124.0" +"aws-cdk.core" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.cloud-assembly-schema" -version = "1.115.0" +version = "1.124.0" description = "Cloud Assembly Schema" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.core" -version = "1.115.0" +version = "1.124.0" description = "AWS Cloud Development Kit Core Library" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.cloud-assembly-schema" = "1.115.0" -"aws-cdk.cx-api" = "1.115.0" -"aws-cdk.region-info" = "1.115.0" +"aws-cdk.cloud-assembly-schema" = "1.124.0" +"aws-cdk.cx-api" = "1.124.0" +"aws-cdk.region-info" = "1.124.0" constructs = ">=3.3.69,<4.0.0" -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" +publication = ">=0.0.3" + +[[package]] +name = "aws-cdk.custom-resources" +version = "1.124.0" +description = "Constructs for implementing CDK custom resources" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +"aws-cdk.aws-cloudformation" = "1.124.0" +"aws-cdk.aws-ec2" = "1.124.0" +"aws-cdk.aws-iam" = "1.124.0" +"aws-cdk.aws-lambda" = "1.124.0" +"aws-cdk.aws-logs" = "1.124.0" +"aws-cdk.aws-sns" = "1.124.0" +"aws-cdk.core" = "1.124.0" +constructs = ">=3.3.69,<4.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.cx-api" -version = "1.115.0" +version = "1.124.0" description = "Cloud executable protocol" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -"aws-cdk.cloud-assembly-schema" = "1.115.0" -jsii = ">=1.31.0,<2.0.0" +"aws-cdk.cloud-assembly-schema" = "1.124.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] name = "aws-cdk.region-info" -version = "1.115.0" +version = "1.124.0" description = "AWS region information, such as service principal names" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -jsii = ">=1.31.0,<2.0.0" +jsii = ">=1.34.0,<2.0.0" publication = ">=0.0.3" [[package]] @@ -509,14 +651,14 @@ dev = ["bumpversion", "wheel", "watchdog", "flake8", "tox", "coverage", "sphinx" [[package]] name = "cattrs" -version = "1.6.0" +version = "1.8.0" description = "Composable complex class support for attrs and dataclasses." category = "main" optional = false python-versions = ">=3.7,<4.0" [package.dependencies] -attrs = "*" +attrs = ">=20" [[package]] name = "constructs" @@ -547,17 +689,17 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes [[package]] name = "jsii" -version = "1.32.0" +version = "1.34.0" description = "Python client for jsii runtime" category = "main" optional = false python-versions = "~=3.6" [package.dependencies] -attrs = ">=20.1,<21.0" +attrs = ">=21.2,<22.0" cattrs = [ {version = ">=1.0.0,<1.1.0", markers = "python_version < \"3.7\""}, - {version = ">=1.6.0,<1.7.0", markers = "python_version >= \"3.7\""}, + {version = ">=1.8.0,<1.9.0", markers = "python_version >= \"3.7\""}, ] importlib-resources = {version = "*", markers = "python_version < \"3.7\""} python-dateutil = "*" @@ -613,130 +755,158 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes [metadata] lock-version = "1.1" python-versions = ">=3.6.2, <3.10" -content-hash = "6f8430d31b5e3d08bb0393b4c93ca223cc9d49b55bb3045f95326770d74347ca" +content-hash = "6d95fccb052c85375178aa3ade72de9e4ee87c009d7e067dd7d4120c23ded9f5" [metadata.files] attrs = [ - {file = "attrs-20.3.0-py2.py3-none-any.whl", hash = "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6"}, - {file = "attrs-20.3.0.tar.gz", hash = "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"}, + {file = "attrs-21.2.0-py2.py3-none-any.whl", hash = "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1"}, + {file = "attrs-21.2.0.tar.gz", hash = "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"}, ] "aws-cdk.assets" = [ - {file = "aws-cdk.assets-1.115.0.tar.gz", hash = "sha256:e3a569f900451f2f8429a2ad7cd059712f2903d24cbcaa023911f46362496d2d"}, - {file = "aws_cdk.assets-1.115.0-py3-none-any.whl", hash = "sha256:d7f62fdaf500980cbcb0cab82cd08cb7334683428cfb3c67c68f72371e29109f"}, + {file = "aws-cdk.assets-1.124.0.tar.gz", hash = "sha256:8097177806b29824a69bbdb5df9ec74f7b360708b51ed860613d38e30414054a"}, + {file = "aws_cdk.assets-1.124.0-py3-none-any.whl", hash = "sha256:c94b63e36c094111c6a9abb2a9d6c694f3e123034cf5dc23e5293fdc32c44fb3"}, ] "aws-cdk.aws-applicationautoscaling" = [ - {file = "aws-cdk.aws-applicationautoscaling-1.115.0.tar.gz", hash = "sha256:e174b3247252bfec419389b896267516d2f874ec56456880116f79204ae9e3e5"}, - {file = "aws_cdk.aws_applicationautoscaling-1.115.0-py3-none-any.whl", hash = "sha256:45eff7fb107924b6ade243e88edae49f14a599ff3afcaf40a73969c45de733b5"}, + {file = "aws-cdk.aws-applicationautoscaling-1.124.0.tar.gz", hash = "sha256:c3bc89c2754b7ce029c667be9ab1633884bf574d33773a1dc07a3cff1b698670"}, + {file = "aws_cdk.aws_applicationautoscaling-1.124.0-py3-none-any.whl", hash = "sha256:d0dcc91b3de13ad46b874813877af3746adec3ad9f7380b2408a14cdd848b65c"}, ] "aws-cdk.aws-autoscaling-common" = [ - {file = "aws-cdk.aws-autoscaling-common-1.115.0.tar.gz", hash = "sha256:b87c84d3e558b20e3bea515d89cb59d633d71e2c8a6e4e859a691f3c06d45c10"}, - {file = "aws_cdk.aws_autoscaling_common-1.115.0-py3-none-any.whl", hash = "sha256:bc0e56fe4fedd6e5a0d094845c4e1b2681bf60dfb72f2062392ef7edd5b157bd"}, + {file = "aws-cdk.aws-autoscaling-common-1.124.0.tar.gz", hash = "sha256:03f57fcd34d9e370c0929de63c674bdbf2a8fbe2efed40942e0e2bff1ed1d436"}, + {file = "aws_cdk.aws_autoscaling_common-1.124.0-py3-none-any.whl", hash = "sha256:1969320c12bf4107346233b3310464c1e752b65a6577c865abb809711cec2c1f"}, +] +"aws-cdk.aws-certificatemanager" = [ + {file = "aws-cdk.aws-certificatemanager-1.124.0.tar.gz", hash = "sha256:291e7c29aa406619276dc141a3827b0af15c9a997b6e7dc1a8c59bbfb3aa7df7"}, + {file = "aws_cdk.aws_certificatemanager-1.124.0-py3-none-any.whl", hash = "sha256:23071000fe931dd817638b059991872fe93a91a1c1d33750f080c536e9aaf302"}, +] +"aws-cdk.aws-cloudformation" = [ + {file = "aws-cdk.aws-cloudformation-1.124.0.tar.gz", hash = "sha256:c38efe614113c3bdcb964f6c20742994154392bc78e82c34a299d0f1b26a7c65"}, + {file = "aws_cdk.aws_cloudformation-1.124.0-py3-none-any.whl", hash = "sha256:9b530359f567555b83dfbb99f7112fdb2ad893176032ff542ce09f7454ce5107"}, ] "aws-cdk.aws-cloudwatch" = [ - {file = "aws-cdk.aws-cloudwatch-1.115.0.tar.gz", hash = "sha256:adb27916047303bf5748d503dc608041d30ea002b47c4e2c370d2084c1bec8c4"}, - {file = "aws_cdk.aws_cloudwatch-1.115.0-py3-none-any.whl", hash = "sha256:2b6b5e954f0b2a629d977cb6db93ec38e2c3c6dde43d88369dbc7a64c92d1ce1"}, + {file = "aws-cdk.aws-cloudwatch-1.124.0.tar.gz", hash = "sha256:221734f8b6f940068714fe00fd68a8a32d767c713b2adb874365482836248f7f"}, + {file = "aws_cdk.aws_cloudwatch-1.124.0-py3-none-any.whl", hash = "sha256:a9a4abf58e31cb53872601296b41cf8e8d5106807a5775d19a6ac05fbe34bef0"}, ] "aws-cdk.aws-codeguruprofiler" = [ - {file = "aws-cdk.aws-codeguruprofiler-1.115.0.tar.gz", hash = "sha256:bd8954511616b1ae8e6bd88122de5cb94c7d16b79f051452b490af9ec729124d"}, - {file = "aws_cdk.aws_codeguruprofiler-1.115.0-py3-none-any.whl", hash = "sha256:48d6a7ea1a372e3e1dbdb0307c7665ba486ef58b80d1d2ebb56cabb03b40af80"}, + {file = "aws-cdk.aws-codeguruprofiler-1.124.0.tar.gz", hash = "sha256:e37cd801e5b7fa93a0dba84effc36cd94f090b83988c4f165815ba585f7ca866"}, + {file = "aws_cdk.aws_codeguruprofiler-1.124.0-py3-none-any.whl", hash = "sha256:4d4bd49ea2415d9daf7c3c57403060802e5f523bd476a276f1e00a3e3d73c15d"}, +] +"aws-cdk.aws-codestarnotifications" = [ + {file = "aws-cdk.aws-codestarnotifications-1.124.0.tar.gz", hash = "sha256:478486be7e24e455c1fd8a54489de491005997b6ebdc06212a6231e89471414a"}, + {file = "aws_cdk.aws_codestarnotifications-1.124.0-py3-none-any.whl", hash = "sha256:de73fbcceba282ddf3caf5e74b188e4685108cec845f573986ea3fec1c98beba"}, ] "aws-cdk.aws-ec2" = [ - {file = "aws-cdk.aws-ec2-1.115.0.tar.gz", hash = "sha256:e819f98e07d3ee24182f23d435bf164ca7bdfdd42e72305d975b2c75a5a57138"}, - {file = "aws_cdk.aws_ec2-1.115.0-py3-none-any.whl", hash = "sha256:0475af1a07e514136004870c590dd5b187dd4588eb291da4662ed2d7cf5956c7"}, + {file = "aws-cdk.aws-ec2-1.124.0.tar.gz", hash = "sha256:f7515734cac0ef8eeaa003bef85364c878fad4a90876de313d156cc863199811"}, + {file = "aws_cdk.aws_ec2-1.124.0-py3-none-any.whl", hash = "sha256:d000d22d87d887dfbc61b82be897234fc58f421b2fbbbc29f002b683b4fdac4f"}, ] "aws-cdk.aws-ecr" = [ - {file = "aws-cdk.aws-ecr-1.115.0.tar.gz", hash = "sha256:3083470a95283a95275e1f2ad30868f3591d0a5bf432cf4bab360dabe4cb2e29"}, - {file = "aws_cdk.aws_ecr-1.115.0-py3-none-any.whl", hash = "sha256:695842b3b892b404c3219d8b44b9ad7a8bf1fd1957abb97c618dba47e050108b"}, + {file = "aws-cdk.aws-ecr-1.124.0.tar.gz", hash = "sha256:cbf940fbb76eb189143df45f67115673faf10a4b8e7f571660822604c9016aad"}, + {file = "aws_cdk.aws_ecr-1.124.0-py3-none-any.whl", hash = "sha256:1661c6f8fd618ac75da7cdefd36adda747218e4fe27faa44b5df62ecabd0b3f3"}, ] "aws-cdk.aws-ecr-assets" = [ - {file = "aws-cdk.aws-ecr-assets-1.115.0.tar.gz", hash = "sha256:5450bbcebb89eff84327246c6049a90adefe73ed194bd62778ffeee6facf9042"}, - {file = "aws_cdk.aws_ecr_assets-1.115.0-py3-none-any.whl", hash = "sha256:8e7e5b2351370b795b12abd0812a3ace241cc46df8d67aecb92410de2bfd7318"}, + {file = "aws-cdk.aws-ecr-assets-1.124.0.tar.gz", hash = "sha256:b2401b111474413436e664c1652d02d6e053ca946cbbe224a4f9c3c6220005df"}, + {file = "aws_cdk.aws_ecr_assets-1.124.0-py3-none-any.whl", hash = "sha256:7dc6b6f262baffa37df3ed898d8ae74ef2384793be822a91b91159cb512183ff"}, ] "aws-cdk.aws-efs" = [ - {file = "aws-cdk.aws-efs-1.115.0.tar.gz", hash = "sha256:eb96d01635283dbee1101fe57e0a19310974c8de02f75d9042adbab44139fe65"}, - {file = "aws_cdk.aws_efs-1.115.0-py3-none-any.whl", hash = "sha256:8e9e3f0f837e1ff3cfe96da5d700095f24d132c11cc7544f7a9f20024fa27372"}, + {file = "aws-cdk.aws-efs-1.124.0.tar.gz", hash = "sha256:90aaccea5ff55ae4a3045540f78e007c048709e142d77947aa15ad655ed4c011"}, + {file = "aws_cdk.aws_efs-1.124.0-py3-none-any.whl", hash = "sha256:282db0bd269535fb19f0101d4fa6b9cb7cf7dcddf2eaf5d04d7f03fef156c9d0"}, ] "aws-cdk.aws-events" = [ - {file = "aws-cdk.aws-events-1.115.0.tar.gz", hash = "sha256:4ce7f0e894c61849e8157a0170cb74ec5223d18dc613075912f2ef560974856b"}, - {file = "aws_cdk.aws_events-1.115.0-py3-none-any.whl", hash = "sha256:a817f0f46c027163a30eb5bab254540e00f5e5285bb1e8678dfd724f8f1187c0"}, + {file = "aws-cdk.aws-events-1.124.0.tar.gz", hash = "sha256:0b6b5ffca233c0b5d7abaf011072ca896463ce391242ffdf7bf4def28dec8213"}, + {file = "aws_cdk.aws_events-1.124.0-py3-none-any.whl", hash = "sha256:92ba680941365de0f90ad7881b8c2e787c50b85a69bc32e82b4578a3276f810f"}, ] "aws-cdk.aws-glue" = [ - {file = "aws-cdk.aws-glue-1.115.0.tar.gz", hash = "sha256:a85d344e61cfb3e0953665bcd85fd4b7ac282417fe7099e2c54cc393f62bfa99"}, - {file = "aws_cdk.aws_glue-1.115.0-py3-none-any.whl", hash = "sha256:ca2780bf366ab2ba74adb98b6a49c95ee6e5dbde2bc5758657cb5d4197c996ce"}, + {file = "aws-cdk.aws-glue-1.124.0.tar.gz", hash = "sha256:b43f747a2b8480ca848f7ab27b1dd0c7e352c9602fdb039cfc78f5013dbef450"}, + {file = "aws_cdk.aws_glue-1.124.0-py3-none-any.whl", hash = "sha256:d90bc85ae0d6b03536879d6fa72cdc49cfe1d58451b9e0065786b682dc2f9422"}, ] "aws-cdk.aws-iam" = [ - {file = "aws-cdk.aws-iam-1.115.0.tar.gz", hash = "sha256:fe4e3138d6544755cbeb2400fd770b583b01906443648a4588085de2e781707f"}, - {file = "aws_cdk.aws_iam-1.115.0-py3-none-any.whl", hash = "sha256:7ba923894c6ecce33147527dccbf90fdaecc7a5561b2ca9398623f1f063f898c"}, + {file = "aws-cdk.aws-iam-1.124.0.tar.gz", hash = "sha256:9d779439048832c6f4d5722196a9490d80bb649e56bb4dadc554ea3ae940f797"}, + {file = "aws_cdk.aws_iam-1.124.0-py3-none-any.whl", hash = "sha256:249fc537532f73c3cd3f59dc635be58535d9e9f9418062214eb664e14b59a6be"}, ] "aws-cdk.aws-kms" = [ - {file = "aws-cdk.aws-kms-1.115.0.tar.gz", hash = "sha256:1d1feca56bc4c2de722f59a07ee8dc36b6d7a31d70ffe32de5f76c099b2b6322"}, - {file = "aws_cdk.aws_kms-1.115.0-py3-none-any.whl", hash = "sha256:c692b0cebe2b0106ddc0ec3946a895941176b35411d46b27ae9bfb06cdaa9d6d"}, + {file = "aws-cdk.aws-kms-1.124.0.tar.gz", hash = "sha256:205e79bc8f8e009bd1b5df236f0336e977eb141c70575a42080e36829358215f"}, + {file = "aws_cdk.aws_kms-1.124.0-py3-none-any.whl", hash = "sha256:91294f10f02000743eef712da5ba7ea2749b43e4a0ad7d4715c9c95b6a472c10"}, ] "aws-cdk.aws-lambda" = [ - {file = "aws-cdk.aws-lambda-1.115.0.tar.gz", hash = "sha256:11eec3652671f37d261f991eaf963726fed281c5aafe77e9f83afab899398892"}, - {file = "aws_cdk.aws_lambda-1.115.0-py3-none-any.whl", hash = "sha256:65000012469a64096d25614c23e22da74a3d15234925cf44b29fd3d63d21b993"}, + {file = "aws-cdk.aws-lambda-1.124.0.tar.gz", hash = "sha256:801552637c408a693a7b13967da4ec4e8a623f22b90fb0fdfb845c23765e4e29"}, + {file = "aws_cdk.aws_lambda-1.124.0-py3-none-any.whl", hash = "sha256:50d774d026a8a0ca5089df5c8b2c7cc2ef74db2a4b20c5d049210b154d3af03d"}, ] "aws-cdk.aws-logs" = [ - {file = "aws-cdk.aws-logs-1.115.0.tar.gz", hash = "sha256:de30016914a17ca59d55f36029aa10fdc800f8fa69f4a5de822898aebbb29a78"}, - {file = "aws_cdk.aws_logs-1.115.0-py3-none-any.whl", hash = "sha256:8c6adcf54e066a71a6a7031a8592f52f09a01ca0d6a6d1f51080f9996ad7ac52"}, + {file = "aws-cdk.aws-logs-1.124.0.tar.gz", hash = "sha256:2fba565fc4f12b397bd9df1cd9964c1b35ce1ca65cd618407b6b1777bc43d292"}, + {file = "aws_cdk.aws_logs-1.124.0-py3-none-any.whl", hash = "sha256:1f4b1ff436f2d0663e6c76264d7d6ee9dd0d90f3d9c09e5e93f1b0f31abbc379"}, +] +"aws-cdk.aws-opensearchservice" = [ + {file = "aws-cdk.aws-opensearchservice-1.124.0.tar.gz", hash = "sha256:d1bd4ca9ac9cf38b7c04a5e1e63eefe30e6e5e40adc0134e61d468694c71c4b1"}, + {file = "aws_cdk.aws_opensearchservice-1.124.0-py3-none-any.whl", hash = "sha256:170417a55884ac8f26b0ae4cc59c085c8c2a0607b18ca906c1ee4d366b737d85"}, ] "aws-cdk.aws-rds" = [ - {file = "aws-cdk.aws-rds-1.115.0.tar.gz", hash = "sha256:c562843534494ef283474ebd7bba4e44e0b7cb063c0121e20f08ba49749a2a60"}, - {file = "aws_cdk.aws_rds-1.115.0-py3-none-any.whl", hash = "sha256:7c00e329b6455b4279ad9880c2e033509b27be63b31626413f28558ae8d24a7f"}, + {file = "aws-cdk.aws-rds-1.124.0.tar.gz", hash = "sha256:20057fc95cda55fc504987dc0395062836dacc72efce2c86051677a1bb6d8d43"}, + {file = "aws_cdk.aws_rds-1.124.0-py3-none-any.whl", hash = "sha256:bd66c0f76548cee6fb1f100f0e36ab9d5933ef70121b072ae05b3dd26e408ff3"}, ] "aws-cdk.aws-redshift" = [ - {file = "aws-cdk.aws-redshift-1.115.0.tar.gz", hash = "sha256:758e6e940e7a432d46d144ebf8002af51fbe98d452221725510f01488847f9a3"}, - {file = "aws_cdk.aws_redshift-1.115.0-py3-none-any.whl", hash = "sha256:311dcb36814434214917ad707689a210016ce1d6286c69d44ec01f5df27a3c7d"}, + {file = "aws-cdk.aws-redshift-1.124.0.tar.gz", hash = "sha256:70cb4700cdfecad592524cd017a4a859b3d4ae407b3d2fcf329022c1d2faf863"}, + {file = "aws_cdk.aws_redshift-1.124.0-py3-none-any.whl", hash = "sha256:4df5c19f74194fb9bd7a56e5b89b9312c35b681a322b0c1b0e248874f628ddc4"}, +] +"aws-cdk.aws-route53" = [ + {file = "aws-cdk.aws-route53-1.124.0.tar.gz", hash = "sha256:c5137b3c5211632b931d7b79234aec6006f72701c68477086e70c213320639ef"}, + {file = "aws_cdk.aws_route53-1.124.0-py3-none-any.whl", hash = "sha256:97fe84e53c26c1a713a3b57341c2ecf488db56cc0b6127975656c53206ccd471"}, ] "aws-cdk.aws-s3" = [ - {file = "aws-cdk.aws-s3-1.115.0.tar.gz", hash = "sha256:73d72900194b944435056faf42c0df21ca7f6a0f941e0bc8d5cdf3de4c0261e9"}, - {file = "aws_cdk.aws_s3-1.115.0-py3-none-any.whl", hash = "sha256:81f85f3c107f05012a351260640a1bb1911106addbd26f2dd2c22d8c44122053"}, + {file = "aws-cdk.aws-s3-1.124.0.tar.gz", hash = "sha256:3047305a4e013cb796532027c14908003ffe7af95fe8e214e3470a32a11c09e6"}, + {file = "aws_cdk.aws_s3-1.124.0-py3-none-any.whl", hash = "sha256:0b08821e3b79c26110068f54aabdb938da55b562dcf2a28a7171d930334ce71a"}, ] "aws-cdk.aws-s3-assets" = [ - {file = "aws-cdk.aws-s3-assets-1.115.0.tar.gz", hash = "sha256:4aa793512b08d73f0bacb71f72f607a510672d077216cdd1ac307c65bd0751ae"}, - {file = "aws_cdk.aws_s3_assets-1.115.0-py3-none-any.whl", hash = "sha256:0bb1eea914908a5fc69a505b118e89f7d3097bce309126167b738a0aefd98ec6"}, + {file = "aws-cdk.aws-s3-assets-1.124.0.tar.gz", hash = "sha256:568d4c598319e3bf1869536be0586b1004d3c43c2133ba94bf9cda4ad4ae5d5d"}, + {file = "aws_cdk.aws_s3_assets-1.124.0-py3-none-any.whl", hash = "sha256:125c5e3786f2c233512374080553b2a7592efa6a53203764979a1bb987c47338"}, ] "aws-cdk.aws-sam" = [ - {file = "aws-cdk.aws-sam-1.115.0.tar.gz", hash = "sha256:babca8a6fbf68a32ebf6f1fd54f6a7bc506d60dae007fd6e4b06f1637edd42fd"}, - {file = "aws_cdk.aws_sam-1.115.0-py3-none-any.whl", hash = "sha256:ece50ab527eb1e5f84f6de2ad503e7cd61a2351dfcb6446274f8099ffabfcfc5"}, + {file = "aws-cdk.aws-sam-1.124.0.tar.gz", hash = "sha256:39db01a4d88fd05c57dbc4f0c76c2471eab3e75753febc30f2847c546fa8292b"}, + {file = "aws_cdk.aws_sam-1.124.0-py3-none-any.whl", hash = "sha256:b1ca75d2fb13898ed66cd4ee364cfa0b4f0924ab4583994ec4a7200d10c8c71b"}, ] "aws-cdk.aws-secretsmanager" = [ - {file = "aws-cdk.aws-secretsmanager-1.115.0.tar.gz", hash = "sha256:6de8204e4bbcbe8df8852646933c1d8d8cb1332374baee9fe780bd2b413e2423"}, - {file = "aws_cdk.aws_secretsmanager-1.115.0-py3-none-any.whl", hash = "sha256:0acf55659f67ac43c69be9a17e40e382d6122abc8055f092332723e07db15fd9"}, + {file = "aws-cdk.aws-secretsmanager-1.124.0.tar.gz", hash = "sha256:76d3ded9f20d29520d4e54e15c335718cac4f938aacb4827a2a9f98af417576f"}, + {file = "aws_cdk.aws_secretsmanager-1.124.0-py3-none-any.whl", hash = "sha256:0b6ae44966600943eb66fc48a93a0ae2bac60c8d6a5ff9c687ad9675b9f2bc5f"}, ] "aws-cdk.aws-signer" = [ - {file = "aws-cdk.aws-signer-1.115.0.tar.gz", hash = "sha256:9050e46e059edcde6b8e1d80b0d792eb2b4ad36cc00ce0b284d04a15b019b216"}, - {file = "aws_cdk.aws_signer-1.115.0-py3-none-any.whl", hash = "sha256:3b4b920dd5c8873bb0b60c0d2ae340fad434e7f011296f465d482afc094b25da"}, + {file = "aws-cdk.aws-signer-1.124.0.tar.gz", hash = "sha256:96dd4ae63b43c7c12fde59f7ebbbea1895964a5f08c6e2ca4a2a1062abcc2399"}, + {file = "aws_cdk.aws_signer-1.124.0-py3-none-any.whl", hash = "sha256:2fe614e6ce1ea6259d60f3adced41eaefdeace0cf77d961b5fcef815e1f82428"}, +] +"aws-cdk.aws-sns" = [ + {file = "aws-cdk.aws-sns-1.124.0.tar.gz", hash = "sha256:21e838c52cdd9bdcd98fc0fbe16ffad2bf10ba6bf31c5bfcdd9f49a8b3479d0c"}, + {file = "aws_cdk.aws_sns-1.124.0-py3-none-any.whl", hash = "sha256:cb3820fd79643d1c5fb0b69f2b4755900dd16756af0f4c36706d68220a845d8b"}, ] "aws-cdk.aws-sqs" = [ - {file = "aws-cdk.aws-sqs-1.115.0.tar.gz", hash = "sha256:b24e03f0027fd99c6cdfe604e3a2b3d0d203d616dffafc74f74f6715083e2b08"}, - {file = "aws_cdk.aws_sqs-1.115.0-py3-none-any.whl", hash = "sha256:cda589452cb4a6db584050e50f14fbe11757fb0b3aff63f50ae663fad5b7bf27"}, + {file = "aws-cdk.aws-sqs-1.124.0.tar.gz", hash = "sha256:ffed4754784de29473f554e450c6ec1b96c7508a2706406fe8d6442f2a31c58c"}, + {file = "aws_cdk.aws_sqs-1.124.0-py3-none-any.whl", hash = "sha256:382721ca5d82dce9ec2625e5bae26132151748ee60e1269a0aa91cfd03227ee7"}, ] "aws-cdk.aws-ssm" = [ - {file = "aws-cdk.aws-ssm-1.115.0.tar.gz", hash = "sha256:960330865ee74485cab510ba1cac5d8d4578e777f1a421b14e8a20895bbe5ac5"}, - {file = "aws_cdk.aws_ssm-1.115.0-py3-none-any.whl", hash = "sha256:4431c43667b57fe2883a9ef022b277cbd3b62f6ab13cb0b1221513f7f76f2aac"}, + {file = "aws-cdk.aws-ssm-1.124.0.tar.gz", hash = "sha256:bcfc99a5cdf23849503c72d93b9e5734d11976453004f13ebca2a66aeb3df10c"}, + {file = "aws_cdk.aws_ssm-1.124.0-py3-none-any.whl", hash = "sha256:4d7335c2ce0200c1ed347422139c9d9b07c71297253ba911470114277996cc76"}, ] "aws-cdk.cloud-assembly-schema" = [ - {file = "aws-cdk.cloud-assembly-schema-1.115.0.tar.gz", hash = "sha256:d565a8418e0cc05d3471dd48424477528d72bdd7d17adc9a049068559666a3ae"}, - {file = "aws_cdk.cloud_assembly_schema-1.115.0-py3-none-any.whl", hash = "sha256:0686e6f7e5da48dbd2ff724953d51eb0495b6772bdb17400024bb42e6fe05baf"}, + {file = "aws-cdk.cloud-assembly-schema-1.124.0.tar.gz", hash = "sha256:d2989a6742ad988fa0f7085ab67fb7ced14f4c3b1a98cc0bf4a0ea1a9358667c"}, + {file = "aws_cdk.cloud_assembly_schema-1.124.0-py3-none-any.whl", hash = "sha256:77d3f63629b7213c639ffd4c46eb63ce9dd048e9a91a045afa72dcce9576ee6b"}, ] "aws-cdk.core" = [ - {file = "aws-cdk.core-1.115.0.tar.gz", hash = "sha256:42a691cc183219ce76eb58e17507edf768a0f5eca0ea98661b4b1f16f178b90d"}, - {file = "aws_cdk.core-1.115.0-py3-none-any.whl", hash = "sha256:93a8e3d87f79af75866bf3f1cfc702dd5664526ec0f70a1c5f7ade82cb1536b1"}, + {file = "aws-cdk.core-1.124.0.tar.gz", hash = "sha256:bbdc1cf5affc34d0caa549771dc6b41ce467744f8ca727b215f0d89b853f4f0c"}, + {file = "aws_cdk.core-1.124.0-py3-none-any.whl", hash = "sha256:56c4549161029c707aa527882e4741fca1ef4c46f63a6417e56e968710cfba7c"}, +] +"aws-cdk.custom-resources" = [ + {file = "aws-cdk.custom-resources-1.124.0.tar.gz", hash = "sha256:d2be1a1636b65e275521970b9c9accd02718f678ebb074a580b15b695e4b60d5"}, + {file = "aws_cdk.custom_resources-1.124.0-py3-none-any.whl", hash = "sha256:6c9abcc046a92dc6845c8a81e33ac727da95e0c0d95b3fba0d433de7dae10a61"}, ] "aws-cdk.cx-api" = [ - {file = "aws-cdk.cx-api-1.115.0.tar.gz", hash = "sha256:10251ef8deaf7acfb7f7356e07c53cd86bbd8725631795e1ce8f8891bcaffad0"}, - {file = "aws_cdk.cx_api-1.115.0-py3-none-any.whl", hash = "sha256:6c03bc14f8d645e63329cb152b2f1fe339a556c297f1c3ecfa75ca9a981f9dca"}, + {file = "aws-cdk.cx-api-1.124.0.tar.gz", hash = "sha256:b8ad4e1a2a5545dd256b50d36efb6d59b9b89b4b1034e7b7f9edfdaa476b181b"}, + {file = "aws_cdk.cx_api-1.124.0-py3-none-any.whl", hash = "sha256:64b6f3ba0313cdea9963f9d210932cf770366a9d860520e1f15e64a26e97c5d6"}, ] "aws-cdk.region-info" = [ - {file = "aws-cdk.region-info-1.115.0.tar.gz", hash = "sha256:4f6b282fa495c244c1f96deea4aed77e702312373204e34b3bba53da27851974"}, - {file = "aws_cdk.region_info-1.115.0-py3-none-any.whl", hash = "sha256:b346bdab4bf54a5956fab020bc085b6c2c304f485dd2d09c8fb586728dfe7c11"}, + {file = "aws-cdk.region-info-1.124.0.tar.gz", hash = "sha256:c28d31226f9000db1375044ea22ba496cc75e8c3db6aa1493a687ff0f89ccdae"}, + {file = "aws_cdk.region_info-1.124.0-py3-none-any.whl", hash = "sha256:594b5f275766b22864e6111f194cfe7a12713ffc61963d063ce06812fa484728"}, ] cattrs = [ {file = "cattrs-1.0.0-py2.py3-none-any.whl", hash = "sha256:616972ae3dfa6e623a40ad3cb845420e64942989152774ab055e5c2b2f89f997"}, {file = "cattrs-1.0.0.tar.gz", hash = "sha256:b7ab5cf8ad127c42eefd01410c1c6e28569a45a255ea80ed968511873c433c7a"}, - {file = "cattrs-1.6.0-py3-none-any.whl", hash = "sha256:c8de53900e3acad94ca83750eb12bb38aa85ce9114be47177c943e2f0eca63b0"}, - {file = "cattrs-1.6.0.tar.gz", hash = "sha256:3e2cd5dc8a1006d5da53ddcbf4f0b1dd3a21e294323b257678d0a96721f8253a"}, + {file = "cattrs-1.8.0-py3-none-any.whl", hash = "sha256:901fb2040529ae8fc9d93f48a2cdf7de3e983312ffb2a164ffa4e9847f253af1"}, + {file = "cattrs-1.8.0.tar.gz", hash = "sha256:5c121ab06a7cac494813c228721a7feb5a6423b17316eeaebf13f5a03e5b0d53"}, ] constructs = [ {file = "constructs-3.3.101-py3-none-any.whl", hash = "sha256:0605ea091dda433f0915ba5b3c74bf967d90fb0cf975a5c3b34a7150a3cf48d1"}, @@ -747,8 +917,8 @@ importlib-resources = [ {file = "importlib_resources-5.2.0.tar.gz", hash = "sha256:22a2c42d8c6a1d30aa8a0e1f57293725bfd5c013d562585e46aff469e0ff78b3"}, ] jsii = [ - {file = "jsii-1.32.0-py3-none-any.whl", hash = "sha256:c71321c4b74ed2c29edc9943c22a36c60a8626df6e0a7173b9ae41366b1a9cb9"}, - {file = "jsii-1.32.0.tar.gz", hash = "sha256:b95e7747812e16cafbfde80b714d9b684c7a4ee57a00cbaf8f138d5868bdb2ae"}, + {file = "jsii-1.34.0-py3-none-any.whl", hash = "sha256:d0a703d0d44bf78bb90529699599d2a58a68ca764f996808e97eafc68e2467de"}, + {file = "jsii-1.34.0.tar.gz", hash = "sha256:e72ba5fafabdd5b6a3a65bd2cf42302eb87f2fe7c6339bddb808226a91623654"}, ] publication = [ {file = "publication-0.0.3-py2.py3-none-any.whl", hash = "sha256:0248885351febc11d8a1098d5c8e3ab2dabcf3e8c0c96db1e17ecd12b53afbe6"}, diff --git a/test_infra/pyproject.toml b/test_infra/pyproject.toml index e6dda67cb..02e0241d8 100644 --- a/test_infra/pyproject.toml +++ b/test_infra/pyproject.toml @@ -7,14 +7,15 @@ license = "Apache License 2.0" [tool.poetry.dependencies] python = ">=3.6.2, <3.10" -"aws-cdk.core" = "^1.115.0" -"aws-cdk.aws-ec2" = "^1.115.0" -"aws-cdk.aws-glue" = "^1.115.0" -"aws-cdk.aws-iam" = "^1.115.0" -"aws-cdk.aws-kms" = "^1.115.0" -"aws-cdk.aws-logs" = "^1.115.0" -"aws-cdk.aws-s3" = "^1.115.0" -"aws-cdk.aws-redshift" = "^1.115.0" -"aws-cdk.aws-rds" = "^1.115.0" -"aws-cdk.aws-secretsmanager" = "^1.115.0" -"aws-cdk.aws-ssm" = "^1.115.0" +"aws-cdk.core" = "^1.124.0" +"aws-cdk.aws-ec2" = "^1.124.0" +"aws-cdk.aws-glue" = "^1.124.0" +"aws-cdk.aws-iam" = "^1.124.0" +"aws-cdk.aws-kms" = "^1.124.0" +"aws-cdk.aws-logs" = "^1.124.0" +"aws-cdk.aws-s3" = "^1.124.0" +"aws-cdk.aws-redshift" = "^1.124.0" +"aws-cdk.aws-rds" = "^1.124.0" +"aws-cdk.aws-secretsmanager" = "^1.124.0" +"aws-cdk.aws-ssm" = "^1.124.0" +"aws-cdk.aws-opensearchservice" = "^1.124.0" diff --git a/test_infra/scripts/delete-opensearch.sh b/test_infra/scripts/delete-opensearch.sh new file mode 100755 index 000000000..1c1c01ba2 --- /dev/null +++ b/test_infra/scripts/delete-opensearch.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -e + +pushd .. +cdk destroy aws-data-wrangler-opensearch +popd diff --git a/test_infra/scripts/deploy-opensearch.sh b/test_infra/scripts/deploy-opensearch.sh new file mode 100755 index 000000000..e94818af4 --- /dev/null +++ b/test_infra/scripts/deploy-opensearch.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -e + +pushd .. +cdk bootstrap +cdk deploy aws-data-wrangler-opensearch +popd diff --git a/test_infra/stacks/opensearch_stack.py b/test_infra/stacks/opensearch_stack.py new file mode 100644 index 000000000..f3bc6a1f8 --- /dev/null +++ b/test_infra/stacks/opensearch_stack.py @@ -0,0 +1,105 @@ +from aws_cdk import aws_ec2 as ec2 +from aws_cdk import aws_iam as iam +from aws_cdk import aws_kms as kms +from aws_cdk import aws_opensearchservice as opensearch +from aws_cdk import aws_s3 as s3 +from aws_cdk import aws_secretsmanager as secrets +from aws_cdk import core as cdk + + +def validate_domain_name(name: str): + if not 3 <= len(name) <= 28: + raise ValueError(f"invalid domain name ({name}) - bad length ({len(name)})") + for c in name: + if not ("a" <= c <= "z" or c.isdigit() or c in ["-"]): + raise ValueError(f'invalid domain name ({name}) - bad character ("{c}")') + + +class OpenSearchStack(cdk.Stack): # type: ignore + def __init__( + self, + scope: cdk.Construct, + construct_id: str, + vpc: ec2.IVpc, + bucket: s3.IBucket, + key: kms.Key, + **kwargs: str, + ) -> None: + """ + AWS Data Wrangler Development OpenSearch Infrastructure. + Includes OpenSearch, Elasticsearch, ... + """ + super().__init__(scope, construct_id, **kwargs) + + self.vpc = vpc + self.key = key + self.bucket = bucket + + self._set_opensearch_infra() + self._setup_opensearch_1_0() + self._setup_elasticsearch_7_10_fgac() + + def _set_opensearch_infra(self) -> None: + self.username = "test" + # fmt: off + self.password_secret = secrets.Secret( + self, + "opensearch-password-secret", + secret_name="aws-data-wrangler/opensearch_password", + generate_secret_string=secrets.SecretStringGenerator(exclude_characters="/@\"\' \\"), + ).secret_value + # fmt: on + self.password = self.password_secret.to_string() + + def _setup_opensearch_1_0(self) -> None: + domain_name = "wrangler-os-1-0" + validate_domain_name(domain_name) + domain_arn = f"arn:aws:es:{self.region}:{self.account}:domain/{domain_name}" + domain = opensearch.Domain( + self, + domain_name, + domain_name=domain_name, + version=opensearch.EngineVersion.OPENSEARCH_1_0, + capacity=opensearch.CapacityConfig(data_node_instance_type="t3.small.search", data_nodes=1), + access_policies=[ + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=["es:*"], + principals=[iam.AccountRootPrincipal()], + resources=[f"{domain_arn}/*"], + ) + ], + removal_policy=cdk.RemovalPolicy.DESTROY, + ) + + cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint) + + def _setup_elasticsearch_7_10_fgac(self) -> None: + domain_name = "wrangler-es-7-10-fgac" + validate_domain_name(domain_name) + domain_arn = f"arn:aws:es:{self.region}:{self.account}:domain/{domain_name}" + domain = opensearch.Domain( + self, + domain_name, + domain_name=domain_name, + version=opensearch.EngineVersion.ELASTICSEARCH_7_10, + capacity=opensearch.CapacityConfig(data_node_instance_type="t3.small.search", data_nodes=1), + access_policies=[ + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=["es:*"], + principals=[iam.AnyPrincipal()], # FGACs + resources=[f"{domain_arn}/*"], + ) + ], + fine_grained_access_control=opensearch.AdvancedSecurityOptions( + master_user_name=self.username, + master_user_password=self.password_secret, + ), + node_to_node_encryption=True, + encryption_at_rest=opensearch.EncryptionAtRestOptions(enabled=True, kms_key=self.key), + enforce_https=True, + removal_policy=cdk.RemovalPolicy.DESTROY, + ) + + cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint) diff --git a/tests/_utils.py b/tests/_utils.py index 85df69484..5f74c4e83 100644 --- a/tests/_utils.py +++ b/tests/_utils.py @@ -528,9 +528,10 @@ def extract_cloudformation_outputs(): client = boto3.client("cloudformation") response = try_it(client.describe_stacks, botocore.exceptions.ClientError, max_num_tries=5) for stack in response.get("Stacks"): - if (stack["StackName"] in ["aws-data-wrangler-base", "aws-data-wrangler-databases"]) and ( - stack["StackStatus"] in CFN_VALID_STATUS - ): + if ( + stack["StackName"] + in ["aws-data-wrangler-base", "aws-data-wrangler-databases", "aws-data-wrangler-opensearch"] + ) and (stack["StackStatus"] in CFN_VALID_STATUS): for output in stack.get("Outputs"): outputs[output.get("OutputKey")] = output.get("OutputValue") return outputs diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py new file mode 100644 index 000000000..345d248e3 --- /dev/null +++ b/tests/test_opensearch.py @@ -0,0 +1,358 @@ +import json +import logging +import tempfile +import time + +import boto3 +import pandas as pd +import pytest # type: ignore + +import awswrangler as wr + +from ._utils import extract_cloudformation_outputs + +logging.getLogger("awswrangler").setLevel(logging.DEBUG) + + +inspections_documents = [ + { + "business_address": "315 California St", + "business_city": "San Francisco", + "business_id": "24936", + "business_latitude": "37.793199", + "business_location": {"lon": -122.400152, "lat": 37.793199}, + "business_longitude": "-122.400152", + "business_name": "San Francisco Soup Company", + "business_postal_code": "94104", + "business_state": "CA", + "inspection_date": "2016-06-09T00:00:00.000", + "inspection_id": "24936_20160609", + "inspection_score": 77, + "inspection_type": "Routine - Unscheduled", + "risk_category": "Low Risk", + "violation_description": "Improper food labeling or menu misrepresentation", + "violation_id": "24936_20160609_103141", + }, + { + "business_address": "10 Mason St", + "business_city": "San Francisco", + "business_id": "60354", + "business_latitude": "37.783527", + "business_location": {"lon": -122.409061, "lat": 37.783527}, + "business_longitude": "-122.409061", + "business_name": "Soup Unlimited", + "business_postal_code": "94102", + "business_state": "CA", + "inspection_date": "2016-11-23T00:00:00.000", + "inspection_id": "60354_20161123", + "inspection_type": "Routine", + "inspection_score": 95, + }, + { + "business_address": "2872 24th St", + "business_city": "San Francisco", + "business_id": "1797", + "business_latitude": "37.752807", + "business_location": {"lon": -122.409752, "lat": 37.752807}, + "business_longitude": "-122.409752", + "business_name": "TIO CHILOS GRILL", + "business_postal_code": "94110", + "business_state": "CA", + "inspection_date": "2016-07-05T00:00:00.000", + "inspection_id": "1797_20160705", + "inspection_score": 90, + "inspection_type": "Routine - Unscheduled", + "risk_category": "Low Risk", + "violation_description": "Unclean nonfood contact surfaces", + "violation_id": "1797_20160705_103142", + }, + { + "business_address": "1661 Tennessee St Suite 3B", + "business_city": "San Francisco Whard Restaurant", + "business_id": "66198", + "business_latitude": "37.75072", + "business_location": {"lon": -122.388478, "lat": 37.75072}, + "business_longitude": "-122.388478", + "business_name": "San Francisco Restaurant", + "business_postal_code": "94107", + "business_state": "CA", + "inspection_date": "2016-05-27T00:00:00.000", + "inspection_id": "66198_20160527", + "inspection_type": "Routine", + "inspection_score": 56, + }, + { + "business_address": "2162 24th Ave", + "business_city": "San Francisco", + "business_id": "5794", + "business_latitude": "37.747228", + "business_location": {"lon": -122.481299, "lat": 37.747228}, + "business_longitude": "-122.481299", + "business_name": "Soup House", + "business_phone_number": "+14155752700", + "business_postal_code": "94116", + "business_state": "CA", + "inspection_date": "2016-09-07T00:00:00.000", + "inspection_id": "5794_20160907", + "inspection_score": 96, + "inspection_type": "Routine - Unscheduled", + "risk_category": "Low Risk", + "violation_description": "Unapproved or unmaintained equipment or utensils", + "violation_id": "5794_20160907_103144", + }, + { + "business_address": "2162 24th Ave", + "business_city": "San Francisco", + "business_id": "5794", + "business_latitude": "37.747228", + "business_location": {"lon": -122.481299, "lat": 37.747228}, + "business_longitude": "-122.481299", + "business_name": "Soup-or-Salad", + "business_phone_number": "+14155752700", + "business_postal_code": "94116", + "business_state": "CA", + "inspection_date": "2016-09-07T00:00:00.000", + "inspection_id": "5794_20160907", + "inspection_score": 96, + "inspection_type": "Routine - Unscheduled", + "risk_category": "Low Risk", + "violation_description": "Unapproved or unmaintained equipment or utensils", + "violation_id": "5794_20160907_103144", + }, +] + + +@pytest.fixture(scope="session") +def cloudformation_outputs(): + return extract_cloudformation_outputs() + + +@pytest.fixture(scope="session") +def opensearch_password(): + return boto3.client("secretsmanager").get_secret_value(SecretId="aws-data-wrangler/opensearch_password")[ + "SecretString" + ] + + +@pytest.fixture(scope="session") +def domain_endpoint_opensearch_1_0(cloudformation_outputs): + return cloudformation_outputs["DomainEndpointwrangleros10"] + + +@pytest.fixture(scope="session") +def domain_endpoint_elasticsearch_7_10_fgac(cloudformation_outputs): + return cloudformation_outputs["DomainEndpointwrangleres710fgac"] + + +def test_connection_opensearch_1_0(domain_endpoint_opensearch_1_0): + client = wr.opensearch.connect(host=domain_endpoint_opensearch_1_0) + print(client.info()) + assert len(client.info()) > 0 + + +def test_connection_opensearch_1_0_https(domain_endpoint_opensearch_1_0): + client = wr.opensearch.connect(host=f"https://{domain_endpoint_opensearch_1_0}") + print(client.info()) + assert len(client.info()) > 0 + + +def test_connection_elasticsearch_7_10_fgac(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password): + client = wr.opensearch.connect( + host=domain_endpoint_elasticsearch_7_10_fgac, username="test", password=opensearch_password + ) + print(client.info()) + assert len(client.info()) > 0 + + +@pytest.fixture(scope="session") +def opensearch_1_0_client(domain_endpoint_opensearch_1_0): + client = wr.opensearch.connect(host=domain_endpoint_opensearch_1_0) + return client + + +@pytest.fixture(scope="session") +def elasticsearch_7_10_fgac_client(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password): + client = wr.opensearch.connect( + host=domain_endpoint_elasticsearch_7_10_fgac, username="test", password=opensearch_password + ) + return client + + +# testing multiple versions +@pytest.fixture(params=["opensearch_1_0_client", "elasticsearch_7_10_fgac_client"]) +def client(request): + return request.getfixturevalue(request.param) + + +def test_create_index(client): + index = "test_create_index" + wr.opensearch.delete_index(client, index) + time.sleep(0.5) # let the cluster clean up + response = wr.opensearch.create_index( + client=client, + index=index, + mappings={"properties": {"name": {"type": "text"}, "age": {"type": "integer"}}}, + settings={"index": {"number_of_shards": 1, "number_of_replicas": 1}}, + ) + assert response.get("acknowledged", False) is True + + +def test_delete_index(client): + index = "test_delete_index" + wr.opensearch.create_index(client, index=index) + response = wr.opensearch.delete_index(client, index=index) + print(response) + assert response.get("acknowledged", False) is True + + +def test_index_df(client): + response = wr.opensearch.index_df( + client, + df=pd.DataFrame([{"_id": "1", "name": "John"}, {"_id": "2", "name": "George"}, {"_id": "3", "name": "Julia"}]), + index="test_index_df1", + ) + print(response) + assert response.get("success", 0) == 3 + + +def test_index_documents(client): + response = wr.opensearch.index_documents( + client, + documents=[{"_id": "1", "name": "John"}, {"_id": "2", "name": "George"}, {"_id": "3", "name": "Julia"}], + index="test_index_documents1", + ) + print(response) + assert response.get("success", 0) == 3 + + +def test_index_documents_id_keys(client): + response = wr.opensearch.index_documents( + client, documents=inspections_documents, index="test_index_documents_id_keys", id_keys=["inspection_id"] + ) + print(response) + + +def test_index_documents_no_id_keys(client): + response = wr.opensearch.index_documents( + client, documents=inspections_documents, index="test_index_documents_no_id_keys" + ) + print(response) + + +def test_search(client): + index = "test_search" + wr.opensearch.index_documents( + client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for" + ) + df = wr.opensearch.search( + client, + index=index, + search_body={"query": {"match": {"business_name": "soup"}}}, + _source=["inspection_id", "business_name", "business_location"], + ) + + print("") + print(df.to_string()) + assert df.shape[0] == 3 + + +def test_search_filter_path(client): + index = "test_search" + wr.opensearch.index_documents( + client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for" + ) + df = wr.opensearch.search( + client, + index=index, + search_body={"query": {"match": {"business_name": "soup"}}}, + _source=["inspection_id", "business_name", "business_location"], + filter_path=["hits.hits._source"], + ) + + print("") + print(df.to_string()) + assert df.shape[0] == 3 + + +def test_search_scroll(client): + index = "test_search_scroll" + wr.opensearch.index_documents( + client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for" + ) + df = wr.opensearch.search( + client, index=index, is_scroll=True, _source=["inspection_id", "business_name", "business_location"] + ) + + print("") + print(df.to_string()) + assert df.shape[0] == 5 + + +def test_search_sql(client): + index = "test_search_sql" + wr.opensearch.index_documents( + client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for" + ) + df = wr.opensearch.search_by_sql(client, sql_query=f"select * from {index}") + + print("") + print(df.to_string()) + assert df.shape[0] == 5 + + +def test_index_json_local(client): + file_path = f"{tempfile.gettempdir()}/inspections.json" + with open(file_path, "w") as filehandle: + for doc in inspections_documents: + filehandle.write("%s\n" % json.dumps(doc)) + response = wr.opensearch.index_json(client, index="test_index_json_local", path=file_path) + print(response) + assert response.get("success", 0) == 6 + + +def test_index_json_s3(client, path): + file_path = f"{tempfile.gettempdir()}/inspections.json" + with open(file_path, "w") as filehandle: + for doc in inspections_documents: + filehandle.write("%s\n" % json.dumps(doc)) + s3 = boto3.client("s3") + path = f"{path}opensearch/inspections.json" + bucket, key = wr._utils.parse_path(path) + s3.upload_file(file_path, bucket, key) + response = wr.opensearch.index_json(client, index="test_index_json_s3", path=path) + print(response) + assert response.get("success", 0) == 6 + + +def test_index_csv_local(client): + file_path = f"{tempfile.gettempdir()}/inspections.csv" + index = "test_index_csv_local" + df = pd.DataFrame(inspections_documents) + df.to_csv(file_path, index=False) + response = wr.opensearch.index_csv(client, path=file_path, index=index) + print(response) + assert response.get("success", 0) == 6 + + +def test_index_csv_s3(client, path): + file_path = f"{tempfile.gettempdir()}/inspections.csv" + index = "test_index_csv_s3" + df = pd.DataFrame(inspections_documents) + df.to_csv(file_path, index=False) + s3 = boto3.client("s3") + path = f"{path}opensearch/inspections.csv" + bucket, key = wr._utils.parse_path(path) + s3.upload_file(file_path, bucket, key) + response = wr.opensearch.index_csv(client, path=path, index=index) + print(response) + assert response.get("success", 0) == 6 + + +@pytest.mark.skip(reason="takes a long time (~5 mins) since testing against small clusters") +def test_index_json_s3_large_file(client): + path = "s3://irs-form-990/index_2011.json" + response = wr.opensearch.index_json( + client, index="test_index_json_s3_large_file", path=path, json_path="Filings2011", id_keys=["EIN"], bulk_size=20 + ) + print(response) + assert response.get("success", 0) > 0 diff --git a/tutorials/031 - OpenSearch.ipynb b/tutorials/031 - OpenSearch.ipynb new file mode 100644 index 000000000..afe254669 --- /dev/null +++ b/tutorials/031 - OpenSearch.ipynb @@ -0,0 +1,1668 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![AWS Data Wrangler](_static/logo.png \"AWS Data Wrangler\")](https://github.com/awslabs/aws-data-wrangler)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 31 - OpenSearch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Table of Contents\n", + "* [1. Initialize](#initialize)\n", + " * [Connect to your Amazon OpenSearch domain](#connect)\n", + " * [Enter your bucket name](#bucket)\n", + " * [Initialize sample data](#sample-data)\n", + "* [2. Indexing (load)](#indexing)\n", + "\t* [Index documents (no Pandas)](#index-documents)\n", + "\t* [Index json file](#index-json)\n", + " * [Index CSV](#index-csv)\n", + "* [3. Search](#search)\n", + "\t* [3.1 Search by DSL](#search-dsl)\n", + "\t* [3.2 Search by SQL](#search-sql)\n", + "* [4. Delete Indices](#delete-index)\n", + "* [5. Bonus - Prepare data and index from DataFrame](#bonus)\n", + "\t* [Prepare the data for indexing](#prepare-data)\n", + " * [Create index with mapping](#create-index-w-mapping)\n", + " * [Index dataframe](#index-df)\n", + " * [Execute geo query](#search-geo)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Initialize" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import awswrangler as wr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Connect to your Amazon OpenSearch domain" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "client = wr.opensearch.connect(\n", + " host='OPENSEARCH-ENDPOINT',\n", + "# username='FGAC-USERNAME(OPTIONAL)',\n", + "# password='FGAC-PASSWORD(OPTIONAL)'\n", + ")\n", + "client.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enter your bucket name" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "bucket = 'BUCKET'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialize sample data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "sf_restaurants_inspections = [\n", + " {\n", + " \"inspection_id\": \"24936_20160609\",\n", + " \"business_address\": \"315 California St\",\n", + " \"business_city\": \"San Francisco\",\n", + " \"business_id\": \"24936\",\n", + " \"business_location\": {\"lon\": -122.400152, \"lat\": 37.793199},\n", + " \"business_name\": \"San Francisco Soup Company\",\n", + " \"business_postal_code\": \"94104\",\n", + " \"business_state\": \"CA\",\n", + " \"inspection_date\": \"2016-06-09T00:00:00.000\",\n", + " \"inspection_score\": 77,\n", + " \"inspection_type\": \"Routine - Unscheduled\",\n", + " \"risk_category\": \"Low Risk\",\n", + " \"violation_description\": \"Improper food labeling or menu misrepresentation\",\n", + " \"violation_id\": \"24936_20160609_103141\",\n", + " },\n", + " {\n", + " \"inspection_id\": \"60354_20161123\",\n", + " \"business_address\": \"10 Mason St\",\n", + " \"business_city\": \"San Francisco\",\n", + " \"business_id\": \"60354\",\n", + " \"business_location\": {\"lon\": -122.409061, \"lat\": 37.783527},\n", + " \"business_name\": \"Soup Unlimited\",\n", + " \"business_postal_code\": \"94102\",\n", + " \"business_state\": \"CA\",\n", + " \"inspection_date\": \"2016-11-23T00:00:00.000\",\n", + " \"inspection_type\": \"Routine\",\n", + " \"inspection_score\": 95,\n", + " },\n", + " {\n", + " \"inspection_id\": \"1797_20160705\",\n", + " \"business_address\": \"2872 24th St\",\n", + " \"business_city\": \"San Francisco\",\n", + " \"business_id\": \"1797\",\n", + " \"business_location\": {\"lon\": -122.409752, \"lat\": 37.752807},\n", + " \"business_name\": \"TIO CHILOS GRILL\",\n", + " \"business_postal_code\": \"94110\",\n", + " \"business_state\": \"CA\",\n", + " \"inspection_date\": \"2016-07-05T00:00:00.000\",\n", + " \"inspection_score\": 90,\n", + " \"inspection_type\": \"Routine - Unscheduled\",\n", + " \"risk_category\": \"Low Risk\",\n", + " \"violation_description\": \"Unclean nonfood contact surfaces\",\n", + " \"violation_id\": \"1797_20160705_103142\",\n", + " },\n", + " {\n", + " \"inspection_id\": \"66198_20160527\",\n", + " \"business_address\": \"1661 Tennessee St Suite 3B\",\n", + " \"business_city\": \"San Francisco Whard Restaurant\",\n", + " \"business_id\": \"66198\",\n", + " \"business_location\": {\"lon\": -122.388478, \"lat\": 37.75072},\n", + " \"business_name\": \"San Francisco Restaurant\",\n", + " \"business_postal_code\": \"94107\",\n", + " \"business_state\": \"CA\",\n", + " \"inspection_date\": \"2016-05-27T00:00:00.000\",\n", + " \"inspection_type\": \"Routine\",\n", + " \"inspection_score\": 56,\n", + " },\n", + " {\n", + " \"inspection_id\": \"5794_20160907\",\n", + " \"business_address\": \"2162 24th Ave\",\n", + " \"business_city\": \"San Francisco\",\n", + " \"business_id\": \"5794\",\n", + " \"business_location\": {\"lon\": -122.481299, \"lat\": 37.747228},\n", + " \"business_name\": \"Soup House\",\n", + " \"business_phone_number\": \"+14155752700\",\n", + " \"business_postal_code\": \"94116\",\n", + " \"business_state\": \"CA\",\n", + " \"inspection_date\": \"2016-09-07T00:00:00.000\",\n", + " \"inspection_score\": 96,\n", + " \"inspection_type\": \"Routine - Unscheduled\",\n", + " \"risk_category\": \"Low Risk\",\n", + " \"violation_description\": \"Unapproved or unmaintained equipment or utensils\",\n", + " \"violation_id\": \"5794_20160907_103144\",\n", + " },\n", + " \n", + " # duplicate record\n", + " {\n", + " \"inspection_id\": \"5794_20160907\",\n", + " \"business_address\": \"2162 24th Ave\",\n", + " \"business_city\": \"San Francisco\",\n", + " \"business_id\": \"5794\",\n", + " \"business_location\": {\"lon\": -122.481299, \"lat\": 37.747228},\n", + " \"business_name\": \"Soup-or-Salad\",\n", + " \"business_phone_number\": \"+14155752700\",\n", + " \"business_postal_code\": \"94116\",\n", + " \"business_state\": \"CA\",\n", + " \"inspection_date\": \"2016-09-07T00:00:00.000\",\n", + " \"inspection_score\": 96,\n", + " \"inspection_type\": \"Routine - Unscheduled\",\n", + " \"risk_category\": \"Low Risk\",\n", + " \"violation_description\": \"Unapproved or unmaintained equipment or utensils\",\n", + " \"violation_id\": \"5794_20160907_103144\",\n", + " },\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Indexing (load)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Index documents (no Pandas)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Indexing: 100% (6/6)|####################################|Elapsed Time: 0:00:01" + ] + }, + { + "data": { + "text/plain": [ + "{'success': 6, 'errors': []}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# index documents w/o providing keys (_id is auto-generated)\n", + "wr.opensearch.index_documents(\n", + " client,\n", + " documents=sf_restaurants_inspections,\n", + " index=\"sf_restaurants_inspections\" \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idbusiness_nameinspection_idbusiness_location.lonbusiness_location.lat
0663dd72d-0da4-495b-b0ae-ed000105ae73TIO CHILOS GRILL1797_20160705-122.40975237.752807
1ff2f50f6-5415-4706-9bcb-af7c5eb0afa3Soup House5794_20160907-122.48129937.747228
2b9e8f6a2-8fd1-4660-b041-2997a1a80984San Francisco Soup Company24936_20160609-122.40015237.793199
356b352e6-102b-4eff-8296-7e1fb2459babSoup Unlimited60354_20161123-122.40906137.783527
46fec5411-f79a-48e4-be7b-e0e44d5ebbabSan Francisco Restaurant66198_20160527-122.38847837.750720
57ba4fb17-f9a9-49da-b90e-8b3553d6d97cSoup-or-Salad5794_20160907-122.48129937.747228
\n", + "
" + ], + "text/plain": [ + " _id business_name \\\n", + "0 663dd72d-0da4-495b-b0ae-ed000105ae73 TIO CHILOS GRILL \n", + "1 ff2f50f6-5415-4706-9bcb-af7c5eb0afa3 Soup House \n", + "2 b9e8f6a2-8fd1-4660-b041-2997a1a80984 San Francisco Soup Company \n", + "3 56b352e6-102b-4eff-8296-7e1fb2459bab Soup Unlimited \n", + "4 6fec5411-f79a-48e4-be7b-e0e44d5ebbab San Francisco Restaurant \n", + "5 7ba4fb17-f9a9-49da-b90e-8b3553d6d97c Soup-or-Salad \n", + "\n", + " inspection_id business_location.lon business_location.lat \n", + "0 1797_20160705 -122.409752 37.752807 \n", + "1 5794_20160907 -122.481299 37.747228 \n", + "2 24936_20160609 -122.400152 37.793199 \n", + "3 60354_20161123 -122.409061 37.783527 \n", + "4 66198_20160527 -122.388478 37.750720 \n", + "5 5794_20160907 -122.481299 37.747228 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# read all documents. There are total 6 documents\n", + "wr.opensearch.search(\n", + " client,\n", + " index=\"sf_restaurants_inspections\",\n", + " _source=[\"inspection_id\", \"business_name\", \"business_location\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Index json file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df = pd.DataFrame(sf_restaurants_inspections)\n", + "path = f\"s3://{bucket}/json/sf_restaurants_inspections.json\"\n", + "wr.s3.to_json(df, path,orient='records',lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Indexing: 100% (6/6)|####################################|Elapsed Time: 0:00:00" + ] + }, + { + "data": { + "text/plain": [ + "{'success': 6, 'errors': []}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# index json w/ providing keys\n", + "wr.opensearch.index_json(\n", + " client,\n", + " path=path, # path can be s3 or local\n", + " index=\"sf_restaurants_inspections_dedup\",\n", + " id_keys=[\"inspection_id\"] # can be multiple fields. arg applicable to all index_* functions\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idbusiness_nameinspection_idbusiness_location.lonbusiness_location.lat
024936_20160609San Francisco Soup Company24936_20160609-122.40015237.793199
166198_20160527San Francisco Restaurant66198_20160527-122.38847837.750720
25794_20160907Soup-or-Salad5794_20160907-122.48129937.747228
360354_20161123Soup Unlimited60354_20161123-122.40906137.783527
41797_20160705TIO CHILOS GRILL1797_20160705-122.40975237.752807
\n", + "
" + ], + "text/plain": [ + " _id business_name inspection_id \\\n", + "0 24936_20160609 San Francisco Soup Company 24936_20160609 \n", + "1 66198_20160527 San Francisco Restaurant 66198_20160527 \n", + "2 5794_20160907 Soup-or-Salad 5794_20160907 \n", + "3 60354_20161123 Soup Unlimited 60354_20161123 \n", + "4 1797_20160705 TIO CHILOS GRILL 1797_20160705 \n", + "\n", + " business_location.lon business_location.lat \n", + "0 -122.400152 37.793199 \n", + "1 -122.388478 37.750720 \n", + "2 -122.481299 37.747228 \n", + "3 -122.409061 37.783527 \n", + "4 -122.409752 37.752807 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# now there are no duplicates. There are total 5 documents\n", + "wr.opensearch.search(\n", + " client,\n", + " index=\"sf_restaurants_inspections_dedup\",\n", + " _source=[\"inspection_id\", \"business_name\", \"business_location\"]\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Index CSV" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Indexing: 100% (1000/1000)|##############################|Elapsed Time: 0:00:00" + ] + }, + { + "data": { + "text/plain": [ + "{'success': 1000, 'errors': []}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.opensearch.index_csv(\n", + " client, \n", + " index=\"nyc_restaurants_inspections_sample\", \n", + " path='https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv?accessType=DOWNLOAD', # index_csv supports local, s3 and url path\n", + " id_keys=[\"CAMIS\"],\n", + " pandas_kwargs={'na_filter': True, 'nrows': 1000}, # pandas.read_csv() args - https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html\n", + " bulk_size=500 # modify based on your cluster size\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idCAMISDBABOROBUILDINGSTREETZIPCODEPHONECUISINE DESCRIPTIONINSPECTION DATE...RECORD DATEINSPECTION TYPELatitudeLongitudeCommunity BoardCouncil DistrictCensus TractBINBBLNTA
04161042641610426GLOW THAI RESTAURANTBrooklyn71073 AVENUE11209.07187481920Thai02/26/2020...10/04/2021Cycle Inspection / Re-inspection40.633865-74.026798310.043.06800.03146519.03.058910e+09BK31
14081116240811162CARMINE'SManhattan2450BROADWAY10024.02123622200Italian05/28/2019...10/04/2021Cycle Inspection / Initial Inspection40.791168-73.974308107.06.017900.01033560.01.012380e+09MN12
25001211350012113TANGQueens196-50NORTHERN BOULEVARD11358.07182797080Korean08/16/2018...10/04/2021Cycle Inspection / Initial Inspection40.757850-73.784593411.019.0145101.04124565.04.055200e+09QN48
35001461850014618TOTTO RAMENManhattan248EAST 52 STREET10022.02124210052Japanese08/20/2018...10/04/2021Cycle Inspection / Re-inspection40.756596-73.968749106.04.09800.01038490.01.013250e+09MN19
45004578250045782OLLIE'S CHINESE RESTAURANTManhattan2705BROADWAY10025.02129323300Chinese10/21/2019...10/04/2021Cycle Inspection / Re-inspection40.799318-73.968440107.06.019100.01056562.01.018750e+09MN12
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " _id CAMIS DBA BORO BUILDING \\\n", + "0 41610426 41610426 GLOW THAI RESTAURANT Brooklyn 7107 \n", + "1 40811162 40811162 CARMINE'S Manhattan 2450 \n", + "2 50012113 50012113 TANG Queens 196-50 \n", + "3 50014618 50014618 TOTTO RAMEN Manhattan 248 \n", + "4 50045782 50045782 OLLIE'S CHINESE RESTAURANT Manhattan 2705 \n", + "\n", + " STREET ZIPCODE PHONE CUISINE DESCRIPTION \\\n", + "0 3 AVENUE 11209.0 7187481920 Thai \n", + "1 BROADWAY 10024.0 2123622200 Italian \n", + "2 NORTHERN BOULEVARD 11358.0 7182797080 Korean \n", + "3 EAST 52 STREET 10022.0 2124210052 Japanese \n", + "4 BROADWAY 10025.0 2129323300 Chinese \n", + "\n", + " INSPECTION DATE ... RECORD DATE INSPECTION TYPE \\\n", + "0 02/26/2020 ... 10/04/2021 Cycle Inspection / Re-inspection \n", + "1 05/28/2019 ... 10/04/2021 Cycle Inspection / Initial Inspection \n", + "2 08/16/2018 ... 10/04/2021 Cycle Inspection / Initial Inspection \n", + "3 08/20/2018 ... 10/04/2021 Cycle Inspection / Re-inspection \n", + "4 10/21/2019 ... 10/04/2021 Cycle Inspection / Re-inspection \n", + "\n", + " Latitude Longitude Community Board Council District Census Tract \\\n", + "0 40.633865 -74.026798 310.0 43.0 6800.0 \n", + "1 40.791168 -73.974308 107.0 6.0 17900.0 \n", + "2 40.757850 -73.784593 411.0 19.0 145101.0 \n", + "3 40.756596 -73.968749 106.0 4.0 9800.0 \n", + "4 40.799318 -73.968440 107.0 6.0 19100.0 \n", + "\n", + " BIN BBL NTA \n", + "0 3146519.0 3.058910e+09 BK31 \n", + "1 1033560.0 1.012380e+09 MN12 \n", + "2 4124565.0 4.055200e+09 QN48 \n", + "3 1038490.0 1.013250e+09 MN19 \n", + "4 1056562.0 1.018750e+09 MN12 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.opensearch.search(\n", + " client,\n", + " index=\"nyc_restaurants_inspections_sample\",\n", + " size=5\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Search\n", + "#### Search results are returned as Pandas DataFrame" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 Search by DSL" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idbusiness_nameinspection_idbusiness_location.lonbusiness_location.lat
0ff2f50f6-5415-4706-9bcb-af7c5eb0afa3Soup House5794_20160907-122.48129937.747228
17ba4fb17-f9a9-49da-b90e-8b3553d6d97cSoup-or-Salad5794_20160907-122.48129937.747228
2b9e8f6a2-8fd1-4660-b041-2997a1a80984San Francisco Soup Company24936_20160609-122.40015237.793199
356b352e6-102b-4eff-8296-7e1fb2459babSoup Unlimited60354_20161123-122.40906137.783527
\n", + "
" + ], + "text/plain": [ + " _id business_name \\\n", + "0 ff2f50f6-5415-4706-9bcb-af7c5eb0afa3 Soup House \n", + "1 7ba4fb17-f9a9-49da-b90e-8b3553d6d97c Soup-or-Salad \n", + "2 b9e8f6a2-8fd1-4660-b041-2997a1a80984 San Francisco Soup Company \n", + "3 56b352e6-102b-4eff-8296-7e1fb2459bab Soup Unlimited \n", + "\n", + " inspection_id business_location.lon business_location.lat \n", + "0 5794_20160907 -122.481299 37.747228 \n", + "1 5794_20160907 -122.481299 37.747228 \n", + "2 24936_20160609 -122.400152 37.793199 \n", + "3 60354_20161123 -122.409061 37.783527 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# add a search query. search all soup businesses \n", + "wr.opensearch.search(\n", + " client,\n", + " index=\"sf_restaurants_inspections\",\n", + " _source=[\"inspection_id\", \"business_name\", \"business_location\"],\n", + " filter_path=[\"hits.hits._id\",\"hits.hits._source\"],\n", + " search_body={\n", + " \"query\": {\n", + " \"match\": {\n", + " \"business_name\": \"soup\"\n", + " }\n", + " }\n", + " }\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 Search by SQL" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_index_type_id_scorebusiness_nameinspection_score
0sf_restaurants_inspections_dedup_doc5794_20160907NoneSoup-or-Salad96
1sf_restaurants_inspections_dedup_doc60354_20161123NoneSoup Unlimited95
2sf_restaurants_inspections_dedup_doc24936_20160609NoneSan Francisco Soup Company77
\n", + "
" + ], + "text/plain": [ + " _index _type _id _score \\\n", + "0 sf_restaurants_inspections_dedup _doc 5794_20160907 None \n", + "1 sf_restaurants_inspections_dedup _doc 60354_20161123 None \n", + "2 sf_restaurants_inspections_dedup _doc 24936_20160609 None \n", + "\n", + " business_name inspection_score \n", + "0 Soup-or-Salad 96 \n", + "1 Soup Unlimited 95 \n", + "2 San Francisco Soup Company 77 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.opensearch.search_by_sql(\n", + " client,\n", + " sql_query=\"\"\"SELECT business_name, inspection_score \n", + " FROM sf_restaurants_inspections_dedup\n", + " WHERE business_name LIKE '%soup%'\n", + " ORDER BY inspection_score DESC LIMIT 5\"\"\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Delete Indices" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'acknowledged': True}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.opensearch.delete_index(\n", + " client=client,\n", + " index=\"sf_restaurants_inspections\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Bonus - Prepare data and index from DataFrame" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this exercise we'll use [DOHMH New York City Restaurant Inspection Results dataset](https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/43nn-pn8j)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv?accessType=DOWNLOAD')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare the data for indexing" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# fields names underscore casing \n", + "df.columns = [col.lower().replace(' ', '_') for col in df.columns]\n", + "\n", + "# convert lon/lat to OpenSearch geo_point\n", + "df['business_location'] = \"POINT (\" + df.longitude.fillna('0').astype(str) + \" \" + df.latitude.fillna('0').astype(str) + \")\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create index with mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'acknowledged': True,\n", + " 'shards_acknowledged': True,\n", + " 'index': 'nyc_restaurants_inspections'}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# delete index if exists\n", + "wr.opensearch.delete_index(\n", + " client=client,\n", + " index=\"nyc_restaurants\"\n", + " \n", + ")\n", + "\n", + "# use dynamic_template to map date fields\n", + "# define business_location as geo_point\n", + "wr.opensearch.create_index(\n", + " client=client,\n", + " index=\"nyc_restaurants_inspections\",\n", + " mappings={\n", + " \"dynamic_templates\" : [\n", + " {\n", + " \"dates\" : {\n", + " \"match\" : \"*date\",\n", + " \"mapping\" : {\n", + " \"type\" : \"date\",\n", + " \"format\" : 'MM/dd/yyyy'\n", + " }\n", + " }\n", + " }\n", + " ],\n", + " \"properties\": {\n", + " \"business_location\": {\n", + " \"type\": \"geo_point\"\n", + " }\n", + " }\n", + " } \n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### Index dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Indexing: 100% (382655/382655)|##########################|Elapsed Time: 0:04:15" + ] + }, + { + "data": { + "text/plain": [ + "{'success': 382655, 'errors': []}" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.opensearch.index_df(\n", + " client,\n", + " df=df,\n", + " index=\"nyc_restaurants_inspections\",\n", + " id_keys=[\"camis\"],\n", + " bulk_size=1000\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Execute geo query\n", + "#### Sort restaurants by distance from Times-Square" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
camisdbaborobuildingstreetzipcodephonecuisine_descriptioninspection_dateaction...inspection_typelatitudelongitudecommunity_boardcouncil_districtcensus_tractbinbblntabusiness_location
041551304THE COUNTERManhattan7TIMES SQUARE10036.02129976801American12/22/2016Violations were cited in the following area(s)....Cycle Inspection / Initial Inspection40.755908-73.986681105.03.011300.01086069.01.009940e+09MN17POINT (-73.986680953809 40.755907817312)
150055665ANN INC CAFEManhattan7TIMES SQUARE10036.02125413287American12/11/2019Violations were cited in the following area(s)....Cycle Inspection / Initial Inspection40.755908-73.986681105.03.011300.01086069.01.009940e+09MN17POINT (-73.986680953809 40.755907817312)
250049552ERNST AND YOUNGManhattan5TIMES SQ10036.02127739994Coffee/Tea11/30/2018Violations were cited in the following area(s)....Cycle Inspection / Initial Inspection40.755702-73.987208105.03.011300.01024656.01.010130e+09MN17POINT (-73.987207980138 40.755702020307)
350014078RED LOBSTERManhattan5TIMES SQ10036.02127306706Seafood10/03/2017Violations were cited in the following area(s)....Cycle Inspection / Initial Inspection40.755702-73.987208105.03.011300.01024656.01.010130e+09MN17POINT (-73.987207980138 40.755702020307)
450015171NEW AMSTERDAM THEATERManhattan214WEST 42 STREET10036.02125825472American06/26/2018Violations were cited in the following area(s)....Cycle Inspection / Re-inspection40.756317-73.987652105.03.011300.01024660.01.010130e+09MN17POINT (-73.987651832547 40.756316895053)
..................................................................
9541552060PROSKAUER ROSEManhattan11TIMES SQUARE10036.02129695493American08/11/2017Violations were cited in the following area(s)....Administrative Miscellaneous / Initial Inspection40.756891-73.990023105.03.011300.01087978.01.010138e+09MN17POINT (-73.990023200823 40.756890780426)
9641242148GABBY O'HARA'SManhattan123WEST 39 STREET10018.02122788984Irish07/30/2019Violations were cited in the following area(s)....Cycle Inspection / Re-inspection40.753405-73.986602105.04.011300.01080611.01.008150e+09MN17POINT (-73.986602050292 40.753404587174)
9750095860THE TIMES EATERYManhattan6808 AVENUE10036.06463867787American02/28/2020Violations were cited in the following area(s)....Pre-permit (Operational) / Initial Inspection40.757991-73.989218105.03.011900.01024703.01.010150e+09MN17POINT (-73.989218092096 40.757991356019)
9850072861ITSUManhattan5307 AVENUE10018.09176393645Asian/Asian Fusion09/10/2018Violations were cited in the following area(s)....Pre-permit (Operational) / Initial Inspection40.753844-73.988551105.03.011300.01014485.01.007880e+09MN17POINT (-73.988551029682 40.753843959794)
9950068109LUKE'S LOBSTERManhattan1407BROADWAY10018.09174759192Seafood09/06/2017Violations were cited in the following area(s)....Pre-permit (Operational) / Initial Inspection40.753432-73.987151105.03.011300.01015265.01.008140e+09MN17POINT (-73.98715066791 40.753432097521)
\n", + "

100 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " camis dba boro building street \\\n", + "0 41551304 THE COUNTER Manhattan 7 TIMES SQUARE \n", + "1 50055665 ANN INC CAFE Manhattan 7 TIMES SQUARE \n", + "2 50049552 ERNST AND YOUNG Manhattan 5 TIMES SQ \n", + "3 50014078 RED LOBSTER Manhattan 5 TIMES SQ \n", + "4 50015171 NEW AMSTERDAM THEATER Manhattan 214 WEST 42 STREET \n", + ".. ... ... ... ... ... \n", + "95 41552060 PROSKAUER ROSE Manhattan 11 TIMES SQUARE \n", + "96 41242148 GABBY O'HARA'S Manhattan 123 WEST 39 STREET \n", + "97 50095860 THE TIMES EATERY Manhattan 680 8 AVENUE \n", + "98 50072861 ITSU Manhattan 530 7 AVENUE \n", + "99 50068109 LUKE'S LOBSTER Manhattan 1407 BROADWAY \n", + "\n", + " zipcode phone cuisine_description inspection_date \\\n", + "0 10036.0 2129976801 American 12/22/2016 \n", + "1 10036.0 2125413287 American 12/11/2019 \n", + "2 10036.0 2127739994 Coffee/Tea 11/30/2018 \n", + "3 10036.0 2127306706 Seafood 10/03/2017 \n", + "4 10036.0 2125825472 American 06/26/2018 \n", + ".. ... ... ... ... \n", + "95 10036.0 2129695493 American 08/11/2017 \n", + "96 10018.0 2122788984 Irish 07/30/2019 \n", + "97 10036.0 6463867787 American 02/28/2020 \n", + "98 10018.0 9176393645 Asian/Asian Fusion 09/10/2018 \n", + "99 10018.0 9174759192 Seafood 09/06/2017 \n", + "\n", + " action ... \\\n", + "0 Violations were cited in the following area(s). ... \n", + "1 Violations were cited in the following area(s). ... \n", + "2 Violations were cited in the following area(s). ... \n", + "3 Violations were cited in the following area(s). ... \n", + "4 Violations were cited in the following area(s). ... \n", + ".. ... ... \n", + "95 Violations were cited in the following area(s). ... \n", + "96 Violations were cited in the following area(s). ... \n", + "97 Violations were cited in the following area(s). ... \n", + "98 Violations were cited in the following area(s). ... \n", + "99 Violations were cited in the following area(s). ... \n", + "\n", + " inspection_type latitude longitude \\\n", + "0 Cycle Inspection / Initial Inspection 40.755908 -73.986681 \n", + "1 Cycle Inspection / Initial Inspection 40.755908 -73.986681 \n", + "2 Cycle Inspection / Initial Inspection 40.755702 -73.987208 \n", + "3 Cycle Inspection / Initial Inspection 40.755702 -73.987208 \n", + "4 Cycle Inspection / Re-inspection 40.756317 -73.987652 \n", + ".. ... ... ... \n", + "95 Administrative Miscellaneous / Initial Inspection 40.756891 -73.990023 \n", + "96 Cycle Inspection / Re-inspection 40.753405 -73.986602 \n", + "97 Pre-permit (Operational) / Initial Inspection 40.757991 -73.989218 \n", + "98 Pre-permit (Operational) / Initial Inspection 40.753844 -73.988551 \n", + "99 Pre-permit (Operational) / Initial Inspection 40.753432 -73.987151 \n", + "\n", + " community_board council_district census_tract bin bbl \\\n", + "0 105.0 3.0 11300.0 1086069.0 1.009940e+09 \n", + "1 105.0 3.0 11300.0 1086069.0 1.009940e+09 \n", + "2 105.0 3.0 11300.0 1024656.0 1.010130e+09 \n", + "3 105.0 3.0 11300.0 1024656.0 1.010130e+09 \n", + "4 105.0 3.0 11300.0 1024660.0 1.010130e+09 \n", + ".. ... ... ... ... ... \n", + "95 105.0 3.0 11300.0 1087978.0 1.010138e+09 \n", + "96 105.0 4.0 11300.0 1080611.0 1.008150e+09 \n", + "97 105.0 3.0 11900.0 1024703.0 1.010150e+09 \n", + "98 105.0 3.0 11300.0 1014485.0 1.007880e+09 \n", + "99 105.0 3.0 11300.0 1015265.0 1.008140e+09 \n", + "\n", + " nta business_location \n", + "0 MN17 POINT (-73.986680953809 40.755907817312) \n", + "1 MN17 POINT (-73.986680953809 40.755907817312) \n", + "2 MN17 POINT (-73.987207980138 40.755702020307) \n", + "3 MN17 POINT (-73.987207980138 40.755702020307) \n", + "4 MN17 POINT (-73.987651832547 40.756316895053) \n", + ".. ... ... \n", + "95 MN17 POINT (-73.990023200823 40.756890780426) \n", + "96 MN17 POINT (-73.986602050292 40.753404587174) \n", + "97 MN17 POINT (-73.989218092096 40.757991356019) \n", + "98 MN17 POINT (-73.988551029682 40.753843959794) \n", + "99 MN17 POINT (-73.98715066791 40.753432097521) \n", + "\n", + "[100 rows x 27 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wr.opensearch.search(\n", + " client,\n", + " index=\"nyc_restaurants_inspections\",\n", + " filter_path=[\"hits.hits._source\"],\n", + " size=100,\n", + " search_body={\n", + " \"query\": {\n", + " \"match_all\": {}\n", + " },\n", + " \"sort\": [\n", + " {\n", + " \"_geo_distance\": {\n", + " \"business_location\": { # Times-Square - https://geojson.io/#map=16/40.7563/-73.9862\n", + " \"lat\": 40.75613228383523,\n", + " \"lon\": -73.9865791797638\n", + " },\n", + " \"order\": \"asc\"\n", + " }\n", + " }\n", + " ]\n", + " }\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}