diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d3420ade6..e898ec21e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -215,6 +215,10 @@ or
``./deploy-base.sh``
``./deploy-databases.sh``
+* [OPTIONAL] Deploy the Cloudformation template `opensearch.yaml` (if you need to test Amazon OpenSearch Service). This step could take about 15 minutes to deploy.
+
+``./deploy-opensearch.sh``
+
* Go to the `EC2 -> SecurityGroups` console, open the `aws-data-wrangler-*` security group and configure to accept your IP from any TCP port.
- Alternatively run:
@@ -244,7 +248,7 @@ or
``pytest -n 8 tests/test_db.py``
-* To run all data lake test functions for all python versions (Only if Amazon QuickSight is activated):
+* To run all data lake test functions for all python versions (Only if Amazon QuickSight is activated and Amazon OpenSearch template is deployed):
``./test.sh``
diff --git a/README.md b/README.md
index 16ab96390..bed91146f 100644
--- a/README.md
+++ b/README.md
@@ -136,6 +136,7 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3
- [026 - Amazon Timestream](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/026%20-%20Amazon%20Timestream.ipynb)
- [027 - Amazon Timestream 2](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/027%20-%20Amazon%20Timestream%202.ipynb)
- [028 - Amazon DynamoDB](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/028%20-%20DynamoDB.ipynb)
+ - [031 - OpenSearch](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/031%20-%20OpenSearch.ipynb)
- [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/2.11.0/api.html)
- [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/2.11.0/api.html#amazon-s3)
- [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/2.11.0/api.html#aws-glue-catalog)
diff --git a/awswrangler/__init__.py b/awswrangler/__init__.py
index ee068e4f6..c87d36823 100644
--- a/awswrangler/__init__.py
+++ b/awswrangler/__init__.py
@@ -17,6 +17,7 @@
emr,
exceptions,
mysql,
+ opensearch,
postgresql,
quicksight,
redshift,
@@ -38,6 +39,7 @@
"data_api",
"dynamodb",
"exceptions",
+ "opensearch",
"quicksight",
"s3",
"sts",
diff --git a/awswrangler/opensearch/__init__.py b/awswrangler/opensearch/__init__.py
new file mode 100644
index 000000000..205e70b59
--- /dev/null
+++ b/awswrangler/opensearch/__init__.py
@@ -0,0 +1,17 @@
+"""Utilities Module for Amazon OpenSearch."""
+
+from awswrangler.opensearch._read import search, search_by_sql
+from awswrangler.opensearch._utils import connect
+from awswrangler.opensearch._write import create_index, delete_index, index_csv, index_df, index_documents, index_json
+
+__all__ = [
+ "connect",
+ "create_index",
+ "delete_index",
+ "index_csv",
+ "index_documents",
+ "index_df",
+ "index_json",
+ "search",
+ "search_by_sql",
+]
diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
new file mode 100644
index 000000000..8f2ef95c1
--- /dev/null
+++ b/awswrangler/opensearch/_read.py
@@ -0,0 +1,169 @@
+"""Amazon OpenSearch Read Module (PRIVATE)."""
+
+from typing import Any, Collection, Dict, List, Mapping, Optional, Union
+
+import pandas as pd
+from opensearchpy import OpenSearch
+from opensearchpy.helpers import scan
+
+from awswrangler.opensearch._utils import _get_distribution
+
+
+def _resolve_fields(row: Mapping[str, Any]) -> Mapping[str, Any]:
+ fields = {}
+ for field in row:
+ if isinstance(row[field], dict):
+ nested_fields = _resolve_fields(row[field])
+ for n_field, val in nested_fields.items():
+ fields[f"{field}.{n_field}"] = val
+ else:
+ fields[field] = row[field]
+ return fields
+
+
+def _hit_to_row(hit: Mapping[str, Any]) -> Mapping[str, Any]:
+ row: Dict[str, Any] = {}
+ for k in hit.keys():
+ if k == "_source":
+ solved_fields = _resolve_fields(hit["_source"])
+ row.update(solved_fields)
+ elif k.startswith("_"):
+ row[k] = hit[k]
+ return row
+
+
+def _search_response_to_documents(response: Mapping[str, Any]) -> List[Mapping[str, Any]]:
+ return [_hit_to_row(hit) for hit in response["hits"]["hits"]]
+
+
+def _search_response_to_df(response: Union[Mapping[str, Any], Any]) -> pd.DataFrame:
+ return pd.DataFrame(_search_response_to_documents(response))
+
+
+def search(
+ client: OpenSearch,
+ index: Optional[str] = "_all",
+ search_body: Optional[Dict[str, Any]] = None,
+ doc_type: Optional[str] = None,
+ is_scroll: Optional[bool] = False,
+ filter_path: Optional[Union[str, Collection[str]]] = None,
+ **kwargs: Any,
+) -> pd.DataFrame:
+ """Return results matching query DSL as pandas dataframe.
+
+ Parameters
+ ----------
+ client : OpenSearch
+ instance of opensearchpy.OpenSearch to use.
+ index : str, optional
+ A comma-separated list of index names to search.
+ use `_all` or empty string to perform the operation on all indices.
+ search_body : Dict[str, Any], optional
+ The search definition using the [Query DSL](https://opensearch.org/docs/opensearch/query-dsl/full-text/).
+ doc_type : str, optional
+ Name of the document type (for Elasticsearch versions 5.x and earlier).
+ is_scroll : bool, optional
+ Allows to retrieve a large numbers of results from a single search request using
+ [scroll](https://opensearch.org/docs/opensearch/rest-api/scroll/)
+ for example, for machine learning jobs.
+ Because scroll search contexts consume a lot of memory, we suggest you don’t use the scroll operation
+ for frequent user queries.
+ filter_path : Union[str, Collection[str]], optional
+ Use the filter_path parameter to reduce the size of the OpenSearch Service response \
+(default: ['hits.hits._id','hits.hits._source'])
+ **kwargs :
+ KEYWORD arguments forwarded to [opensearchpy.OpenSearch.search]\
+(https://opensearch-py.readthedocs.io/en/latest/api.html#opensearchpy.OpenSearch.search)
+ and also to [opensearchpy.helpers.scan](https://opensearch-py.readthedocs.io/en/master/helpers.html#scan)
+ if `is_scroll=True`
+
+ Returns
+ -------
+ Union[pandas.DataFrame, Iterator[pandas.DataFrame]]
+ Results as Pandas DataFrame
+
+ Examples
+ --------
+ Searching an index using query DSL
+
+ >>> import awswrangler as wr
+ >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+ >>> df = wr.opensearch.search(
+ ... client=client,
+ ... index='movies',
+ ... search_body={
+ ... "query": {
+ ... "match": {
+ ... "title": "wind"
+ ... }
+ ... }
+ ... }
+ ... )
+
+
+ """
+ if doc_type:
+ kwargs["doc_type"] = doc_type
+
+ if filter_path is None:
+ filter_path = ["hits.hits._id", "hits.hits._source"]
+
+ if is_scroll:
+ if isinstance(filter_path, str):
+ filter_path = [filter_path]
+ filter_path = ["_scroll_id", "_shards"] + list(filter_path) # required for scroll
+ documents_generator = scan(client, index=index, query=search_body, filter_path=filter_path, **kwargs)
+ documents = [_hit_to_row(doc) for doc in documents_generator]
+ df = pd.DataFrame(documents)
+ else:
+ response = client.search(index=index, body=search_body, filter_path=filter_path, **kwargs)
+ df = _search_response_to_df(response)
+ return df
+
+
+def search_by_sql(client: OpenSearch, sql_query: str, **kwargs: Any) -> pd.DataFrame:
+ """Return results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe.
+
+ Parameters
+ ----------
+ client : OpenSearch
+ instance of opensearchpy.OpenSearch to use.
+ sql_query : str
+ SQL query
+ **kwargs :
+ KEYWORD arguments forwarded to request url (e.g.: filter_path, etc.)
+
+ Returns
+ -------
+ Union[pandas.DataFrame, Iterator[pandas.DataFrame]]
+ Results as Pandas DataFrame
+
+ Examples
+ --------
+ Searching an index using SQL query
+
+ >>> import awswrangler as wr
+ >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+ >>> df = wr.opensearch.search_by_sql(
+ >>> client=client,
+ >>> sql_query='SELECT * FROM my-index LIMIT 50'
+ >>> )
+
+
+ """
+ if _get_distribution(client) == "opensearch":
+ url = "/_plugins/_sql"
+ else:
+ url = "/_opendistro/_sql"
+
+ kwargs["format"] = "json"
+ body = {"query": sql_query}
+ for size_att in ["size", "fetch_size"]:
+ if size_att in kwargs:
+ body["fetch_size"] = kwargs[size_att]
+ del kwargs[size_att] # unrecognized parameter
+ response = client.transport.perform_request(
+ "POST", url, headers={"Content-Type": "application/json"}, body=body, params=kwargs
+ )
+ df = _search_response_to_df(response)
+ return df
diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
new file mode 100644
index 000000000..a48b0eadc
--- /dev/null
+++ b/awswrangler/opensearch/_utils.py
@@ -0,0 +1,108 @@
+"""Amazon OpenSearch Utils Module (PRIVATE)."""
+
+import logging
+import re
+from typing import Any, Optional
+
+import boto3
+from opensearchpy import OpenSearch, RequestsHttpConnection
+from requests_aws4auth import AWS4Auth
+
+from awswrangler import _utils, exceptions
+
+_logger: logging.Logger = logging.getLogger(__name__)
+
+
+def _get_distribution(client: OpenSearch) -> Any:
+ return client.info().get("version", {}).get("distribution", "elasticsearch")
+
+
+def _get_version(client: OpenSearch) -> Any:
+ return client.info().get("version", {}).get("number")
+
+
+def _get_version_major(client: OpenSearch) -> Any:
+ version = _get_version(client)
+ if version:
+ return int(version.split(".")[0])
+ return None
+
+
+def _strip_endpoint(endpoint: str) -> str:
+ uri_schema = re.compile(r"https?://")
+ return uri_schema.sub("", endpoint).strip().strip("/")
+
+
+def connect(
+ host: str,
+ port: Optional[int] = 443,
+ boto3_session: Optional[boto3.Session] = boto3.Session(),
+ region: Optional[str] = None,
+ username: Optional[str] = None,
+ password: Optional[str] = None,
+) -> OpenSearch:
+ """Create a secure connection to the specified Amazon OpenSearch domain.
+
+ Note
+ ----
+ We use [opensearch-py](https://github.com/opensearch-project/opensearch-py), an OpenSearch low-level python client.
+
+ The username and password are mandatory if the OS Cluster uses [Fine Grained Access Control]\
+(https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html).
+ If fine grained access control is disabled, session access key and secret keys are used.
+
+ Parameters
+ ----------
+ host : str
+ Amazon OpenSearch domain, for example: my-test-domain.us-east-1.es.amazonaws.com.
+ port : int
+ OpenSearch Service only accepts connections over port 80 (HTTP) or 443 (HTTPS)
+ boto3_session : boto3.Session(), optional
+ Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
+ region :
+ AWS region of the Amazon OS domain. If not provided will be extracted from boto3_session.
+ username :
+ Fine-grained access control username. Mandatory if OS Cluster uses Fine Grained Access Control.
+ password :
+ Fine-grained access control password. Mandatory if OS Cluster uses Fine Grained Access Control.
+
+ Returns
+ -------
+ opensearchpy.OpenSearch
+ OpenSearch low-level client.
+ https://github.com/opensearch-project/opensearch-py/blob/main/opensearchpy/client/__init__.py
+ """
+ valid_ports = {80, 443}
+
+ if port not in valid_ports:
+ raise ValueError(f"results: port must be one of {valid_ports}")
+
+ if username and password:
+ http_auth = (username, password)
+ else:
+ if region is None:
+ region = _utils.get_region_from_session(boto3_session=boto3_session)
+ creds = _utils.get_credentials_from_session(boto3_session=boto3_session)
+ if creds.access_key is None or creds.secret_key is None:
+ raise exceptions.InvalidArgument(
+ "One of IAM Role or AWS ACCESS_KEY_ID and SECRET_ACCESS_KEY must be "
+ "given. Unable to find ACCESS_KEY_ID and SECRET_ACCESS_KEY in boto3 "
+ "session."
+ )
+ http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", session_token=creds.token)
+ try:
+ es = OpenSearch(
+ host=_strip_endpoint(host),
+ port=port,
+ http_auth=http_auth,
+ use_ssl=True,
+ verify_certs=True,
+ connection_class=RequestsHttpConnection,
+ timeout=30,
+ max_retries=10,
+ retry_on_timeout=True,
+ )
+ except Exception as e:
+ _logger.error("Error connecting to Opensearch cluster. Please verify authentication details")
+ raise e
+ return es
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
new file mode 100644
index 000000000..62e9d146e
--- /dev/null
+++ b/awswrangler/opensearch/_write.py
@@ -0,0 +1,573 @@
+"""Amazon OpenSearch Write Module (PRIVATE)."""
+
+import ast
+import json
+import logging
+import uuid
+from typing import Any, Dict, Generator, Iterable, List, Mapping, Optional, Tuple, Union
+
+import boto3
+import pandas as pd
+import progressbar
+from jsonpath_ng import parse
+from jsonpath_ng.exceptions import JsonPathParserError
+from opensearchpy import OpenSearch, TransportError
+from opensearchpy.exceptions import NotFoundError
+from opensearchpy.helpers import bulk
+from pandas import notna
+
+from awswrangler._utils import parse_path
+from awswrangler.opensearch._utils import _get_distribution, _get_version_major
+
+_logger: logging.Logger = logging.getLogger(__name__)
+_logger.setLevel(logging.DEBUG)
+
+_DEFAULT_REFRESH_INTERVAL = "1s"
+
+
+def _selected_keys(document: Mapping[str, Any], keys_to_write: Optional[List[str]]) -> Mapping[str, Any]:
+ if keys_to_write is None:
+ keys_to_write = list(document.keys())
+ keys_to_write = list(filter(lambda x: x != "_id", keys_to_write))
+ return {key: document[key] for key in keys_to_write}
+
+
+def _actions_generator(
+ documents: Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]],
+ index: str,
+ doc_type: Optional[str],
+ keys_to_write: Optional[List[str]],
+ id_keys: Optional[List[str]],
+ bulk_size: int = 10000,
+) -> Generator[List[Dict[str, Any]], None, None]:
+ bulk_chunk_documents = []
+ for i, document in enumerate(documents):
+ if id_keys:
+ _id = "-".join([str(document[id_key]) for id_key in id_keys])
+ else:
+ _id = document.get("_id", uuid.uuid4())
+ bulk_chunk_documents.append(
+ {
+ "_index": index,
+ "_type": doc_type,
+ "_id": _id,
+ "_source": _selected_keys(document, keys_to_write),
+ }
+ )
+ if (i + 1) % bulk_size == 0:
+ yield bulk_chunk_documents
+ bulk_chunk_documents = []
+ if len(bulk_chunk_documents) > 0:
+ yield bulk_chunk_documents
+
+
+def _df_doc_generator(df: pd.DataFrame) -> Generator[Dict[str, Any], None, None]:
+ def _deserialize(v: Any) -> Any:
+ if isinstance(v, str):
+ v = v.strip()
+ if v.startswith("{") and v.endswith("}") or v.startswith("[") and v.endswith("]"):
+ try:
+ v = json.loads(v)
+ except json.decoder.JSONDecodeError:
+ try:
+ v = ast.literal_eval(v) # if properties are enclosed with single quotes
+ if not isinstance(v, dict):
+ _logger.warning("could not convert string to json: %s", v)
+ except SyntaxError as e:
+ _logger.warning("could not convert string to json: %s", v)
+ _logger.warning(e)
+ return v
+
+ df_iter = df.iterrows()
+ for _, document in df_iter:
+ yield {k: _deserialize(v) for k, v in document.items() if notna(v)}
+
+
+def _file_line_generator(path: str, is_json: bool = False) -> Generator[Any, None, None]:
+ with open(path) as fp: # pylint: disable=W1514
+ for line in fp:
+ if is_json:
+ yield json.loads(line)
+ else:
+ yield line.strip()
+
+
+def _get_documents_w_json_path(documents: List[Mapping[str, Any]], json_path: str) -> List[Any]:
+ try:
+ jsonpath_expression = parse(json_path)
+ except JsonPathParserError as e:
+ _logger.error("invalid json_path: %s", json_path)
+ raise e
+ output_documents = []
+ for doc in documents:
+ for match in jsonpath_expression.find(doc):
+ match_value = match.value
+ if isinstance(match_value, list):
+ output_documents += match_value
+ elif isinstance(match_value, dict):
+ output_documents.append(match_value)
+ else:
+ msg = f"expected json_path value to be a list/dict. received type {type(match_value)} ({match_value})"
+ raise ValueError(msg)
+ return output_documents
+
+
+def _get_refresh_interval(client: OpenSearch, index: str) -> Any:
+ url = f"/{index}/_settings"
+ try:
+ response = client.transport.perform_request("GET", url)
+ index_settings = response.get(index, {}).get("index", {}) # type: ignore
+ refresh_interval = index_settings.get("refresh_interval", _DEFAULT_REFRESH_INTERVAL)
+ return refresh_interval
+ except NotFoundError:
+ return None
+
+
+def _set_refresh_interval(client: OpenSearch, index: str, refresh_interval: Optional[Any]) -> Any:
+ url = f"/{index}/_settings"
+ body = {"index": {"refresh_interval": refresh_interval}}
+ response = client.transport.perform_request("PUT", url, headers={"Content-Type": "application/json"}, body=body)
+
+ return response
+
+
+def _disable_refresh_interval(
+ client: OpenSearch,
+ index: str,
+) -> Any:
+ return _set_refresh_interval(client=client, index=index, refresh_interval="-1")
+
+
+def create_index(
+ client: OpenSearch,
+ index: str,
+ doc_type: Optional[str] = None,
+ settings: Optional[Dict[str, Any]] = None,
+ mappings: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+ """Create an index.
+
+ Parameters
+ ----------
+ client : OpenSearch
+ instance of opensearchpy.OpenSearch to use.
+ index : str
+ Name of the index.
+ doc_type : str, optional
+ Name of the document type (for Elasticsearch versions 5.x and earlier).
+ settings : Dict[str, Any], optional
+ Index settings
+ https://opensearch.org/docs/opensearch/rest-api/create-index/#index-settings
+ mappings : Dict[str, Any], optional
+ Index mappings
+ https://opensearch.org/docs/opensearch/rest-api/create-index/#mappings
+
+ Returns
+ -------
+ Dict[str, Any]
+ OpenSearch rest api response
+ https://opensearch.org/docs/opensearch/rest-api/create-index/#response.
+
+ Examples
+ --------
+ Creating an index.
+
+ >>> import awswrangler as wr
+ >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+ >>> response = wr.opensearch.create_index(
+ ... client=client,
+ ... index="sample-index1",
+ ... mappings={
+ ... "properties": {
+ ... "age": { "type" : "integer" }
+ ... }
+ ... },
+ ... settings={
+ ... "index": {
+ ... "number_of_shards": 2,
+ ... "number_of_replicas": 1
+ ... }
+ ... }
+ ... )
+
+ """
+ body = {}
+ if mappings:
+ if _get_distribution(client) == "opensearch" or _get_version_major(client) >= 7:
+ body["mappings"] = mappings # doc type deprecated
+ else:
+ if doc_type:
+ body["mappings"] = {doc_type: mappings}
+ else:
+ body["mappings"] = {index: mappings}
+ if settings:
+ body["settings"] = settings
+ if body == {}:
+ body = None # type: ignore
+
+ # ignore 400 cause by IndexAlreadyExistsException when creating an index
+ response: Dict[str, Any] = client.indices.create(index, body=body, ignore=400)
+ if "error" in response:
+ _logger.warning(response)
+ if str(response["error"]).startswith("MapperParsingException"):
+ raise ValueError(response["error"])
+ return response
+
+
+def delete_index(client: OpenSearch, index: str) -> Dict[str, Any]:
+ """Create an index.
+
+ Parameters
+ ----------
+ client : OpenSearch
+ instance of opensearchpy.OpenSearch to use.
+ index : str
+ Name of the index.
+
+ Returns
+ -------
+ Dict[str, Any]
+ OpenSearch rest api response
+
+ Examples
+ --------
+ Creating an index.
+
+ >>> import awswrangler as wr
+ >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+ >>> response = wr.opensearch.delete_index(
+ ... client=client,
+ ... index="sample-index1"
+ ... )
+
+ """
+ # ignore 400/404 IndexNotFoundError exception
+ response: Dict[str, Any] = client.indices.delete(index, ignore=[400, 404])
+ if "error" in response:
+ _logger.warning(response)
+ return response
+
+
+def index_json(
+ client: OpenSearch,
+ path: str,
+ index: str,
+ doc_type: Optional[str] = None,
+ boto3_session: Optional[boto3.Session] = boto3.Session(),
+ json_path: Optional[str] = None,
+ **kwargs: Any,
+) -> Dict[str, Any]:
+ """Index all documents from JSON file to OpenSearch index.
+
+ The JSON file should be in a JSON-Lines text format (newline-delimited JSON) - https://jsonlines.org/
+ OR if the is a single large JSON please provide `json_path`.
+
+ Parameters
+ ----------
+ client : OpenSearch
+ instance of opensearchpy.OpenSearch to use.
+ path : str
+ s3 or local path to the JSON file which contains the documents.
+ index : str
+ Name of the index.
+ doc_type : str, optional
+ Name of the document type (for Elasticsearch versions 5.x and earlier).
+ json_path : str, optional
+ JsonPath expression to specify explicit path to a single name element
+ in a JSON hierarchical data structure.
+ Read more about [JsonPath](https://jsonpath.com)
+ boto3_session : boto3.Session(), optional
+ Boto3 Session to be used to access s3 if s3 path is provided.
+ The default boto3 Session will be used if boto3_session receive None.
+ **kwargs :
+ KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
+ which is used to execute the operation
+
+ Returns
+ -------
+ Dict[str, Any]
+ Response payload
+ https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response.
+
+ Examples
+ --------
+ Writing contents of JSON file
+
+ >>> import awswrangler as wr
+ >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+ >>> wr.opensearch.index_json(
+ ... client=client,
+ ... path='docs.json',
+ ... index='sample-index1'
+ ... )
+ """
+ _logger.debug("indexing %s from %s", index, path)
+
+ if boto3_session is None:
+ raise ValueError("boto3_session cannot be None")
+
+ if path.startswith("s3://"):
+ bucket, key = parse_path(path)
+ s3 = boto3_session.client("s3")
+ obj = s3.get_object(Bucket=bucket, Key=key)
+ body = obj["Body"].read()
+ lines = body.splitlines()
+ documents = [json.loads(line) for line in lines]
+ if json_path:
+ documents = _get_documents_w_json_path(documents, json_path)
+ else: # local path
+ documents = list(_file_line_generator(path, is_json=True))
+ if json_path:
+ documents = _get_documents_w_json_path(documents, json_path)
+ return index_documents(client=client, documents=documents, index=index, doc_type=doc_type, **kwargs)
+
+
+def index_csv(
+ client: OpenSearch,
+ path: str,
+ index: str,
+ doc_type: Optional[str] = None,
+ pandas_kwargs: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+) -> Dict[str, Any]:
+ """Index all documents from a CSV file to OpenSearch index.
+
+ Parameters
+ ----------
+ client : OpenSearch
+ instance of opensearchpy.OpenSearch to use.
+ path : str
+ s3 or local path to the CSV file which contains the documents.
+ index : str
+ Name of the index.
+ doc_type : str, optional
+ Name of the document type (for Elasticsearch versions 5.x and earlier).
+ pandas_kwargs : Dict[str, Any], optional
+ Dictionary of arguments forwarded to pandas.read_csv().
+ e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']}
+ https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
+ Note: these params values are enforced: `skip_blank_lines=True`
+ **kwargs :
+ KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
+ which is used to execute the operation
+
+ Returns
+ -------
+ Dict[str, Any]
+ Response payload
+ https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response.
+
+ Examples
+ --------
+ Writing contents of CSV file
+
+ >>> import awswrangler as wr
+ >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+ >>> wr.opensearch.index_csv(
+ ... client=client,
+ ... path='docs.csv',
+ ... index='sample-index1'
+ ... )
+
+ Writing contents of CSV file using pandas_kwargs
+
+ >>> import awswrangler as wr
+ >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+ >>> wr.opensearch.index_csv(
+ ... client=client,
+ ... path='docs.csv',
+ ... index='sample-index1',
+ ... pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']}
+ ... )
+ """
+ _logger.debug("indexing %s from %s", index, path)
+ if pandas_kwargs is None:
+ pandas_kwargs = {}
+ enforced_pandas_params = {
+ "skip_blank_lines": True,
+ # 'na_filter': True # will generate Nan value for empty cells. We remove Nan keys in _df_doc_generator
+ # Note: if the user will pass na_filter=False null fields will be indexed as well ({"k1": null, "k2": null})
+ }
+ pandas_kwargs.update(enforced_pandas_params)
+ df = pd.read_csv(path, **pandas_kwargs)
+ return index_df(client, df=df, index=index, doc_type=doc_type, **kwargs)
+
+
+def index_df(
+ client: OpenSearch, df: pd.DataFrame, index: str, doc_type: Optional[str] = None, **kwargs: Any
+) -> Dict[str, Any]:
+ """Index all documents from a DataFrame to OpenSearch index.
+
+ Parameters
+ ----------
+ client : OpenSearch
+ instance of opensearchpy.OpenSearch to use.
+ df : pd.DataFrame
+ Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
+ index : str
+ Name of the index.
+ doc_type : str, optional
+ Name of the document type (for Elasticsearch versions 5.x and earlier).
+ **kwargs :
+ KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
+ which is used to execute the operation
+
+ Returns
+ -------
+ Dict[str, Any]
+ Response payload
+ https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response.
+
+ Examples
+ --------
+ Writing rows of DataFrame
+
+ >>> import awswrangler as wr
+ >>> import pandas as pd
+ >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+ >>> wr.opensearch.index_df(
+ ... client=client,
+ ... df=pd.DataFrame([{'_id': '1'}, {'_id': '2'}, {'_id': '3'}]),
+ ... index='sample-index1'
+ ... )
+ """
+ return index_documents(client=client, documents=_df_doc_generator(df), index=index, doc_type=doc_type, **kwargs)
+
+
+def index_documents(
+ client: OpenSearch,
+ documents: Iterable[Mapping[str, Any]],
+ index: str,
+ doc_type: Optional[str] = None,
+ keys_to_write: Optional[List[str]] = None,
+ id_keys: Optional[List[str]] = None,
+ ignore_status: Optional[Union[List[Any], Tuple[Any]]] = None,
+ bulk_size: int = 1000,
+ chunk_size: Optional[int] = 500,
+ max_chunk_bytes: Optional[int] = 100 * 1024 * 1024,
+ max_retries: Optional[int] = 5,
+ initial_backoff: Optional[int] = 2,
+ max_backoff: Optional[int] = 600,
+ **kwargs: Any,
+) -> Dict[str, Any]:
+ """Index all documents to OpenSearch index.
+
+ Note
+ ----
+ Some of the args are referenced from opensearch-py client library (bulk helpers)
+ https://opensearch-py.readthedocs.io/en/latest/helpers.html#opensearchpy.helpers.bulk
+ https://opensearch-py.readthedocs.io/en/latest/helpers.html#opensearchpy.helpers.streaming_bulk
+
+ If you receive `Error 429 (Too Many Requests) /_bulk` please to to decrease `bulk_size` value.
+ Please also consider modifying the cluster size and instance type -
+ Read more here: https://aws.amazon.com/premiumsupport/knowledge-center/resolve-429-error-es/
+
+ Parameters
+ ----------
+ client : OpenSearch
+ instance of opensearchpy.OpenSearch to use.
+ documents : Iterable[Mapping[str, Any]]
+ List which contains the documents that will be inserted.
+ index : str
+ Name of the index.
+ doc_type : str, optional
+ Name of the document type (for Elasticsearch versions 5.x and earlier).
+ keys_to_write : List[str], optional
+ list of keys to index. If not provided all keys will be indexed
+ id_keys : List[str], optional
+ list of keys that compound document unique id. If not provided will use `_id` key if exists,
+ otherwise will generate unique identifier for each document.
+ ignore_status: Union[List[Any], Tuple[Any]], optional
+ list of HTTP status codes that you want to ignore (not raising an exception)
+ bulk_size: int,
+ number of docs in each _bulk request (default: 1000)
+ chunk_size : int, optional
+ number of docs in one chunk sent to es (default: 500)
+ max_chunk_bytes: int, optional
+ the maximum size of the request in bytes (default: 100MB)
+ max_retries : int, optional
+ maximum number of times a document will be retried when
+ ``429`` is received, set to 0 (default) for no retries on ``429`` (default: 2)
+ initial_backoff : int, optional
+ number of seconds we should wait before the first retry.
+ Any subsequent retries will be powers of ``initial_backoff*2**retry_number`` (default: 2)
+ max_backoff: int, optional
+ maximum number of seconds a retry will wait (default: 600)
+ **kwargs :
+ KEYWORD arguments forwarded to bulk operation
+ elasticsearch >= 7.10.2 / opensearch: \
+https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
+ elasticsearch < 7.10.2: \
+https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters
+
+ Returns
+ -------
+ Dict[str, Any]
+ Response payload
+ https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response.
+
+ Examples
+ --------
+ Writing documents
+
+ >>> import awswrangler as wr
+ >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+ >>> wr.opensearch.index_documents(
+ ... documents=[{'_id': '1', 'value': 'foo'}, {'_id': '2', 'value': 'bar'}],
+ ... index='sample-index1'
+ ... )
+ """
+ if not isinstance(documents, list):
+ documents = list(documents)
+ total_documents = len(documents)
+ _logger.debug("indexing %s documents into %s", total_documents, index)
+
+ actions = _actions_generator(
+ documents, index, doc_type, keys_to_write=keys_to_write, id_keys=id_keys, bulk_size=bulk_size
+ )
+
+ success = 0
+ errors: List[Any] = []
+ refresh_interval = None
+ try:
+ widgets = [
+ progressbar.Percentage(),
+ progressbar.SimpleProgress(format=" (%(value_s)s/%(max_value_s)s)"),
+ progressbar.Bar(),
+ progressbar.Timer(),
+ ]
+ progress_bar = progressbar.ProgressBar(widgets=widgets, max_value=total_documents, prefix="Indexing: ").start()
+ for i, bulk_chunk_documents in enumerate(actions):
+ if i == 1: # second bulk iteration, in case the index didn't exist before
+ refresh_interval = _get_refresh_interval(client, index)
+ _disable_refresh_interval(client, index)
+ _logger.debug("running bulk index of %s documents", len(bulk_chunk_documents))
+ _success, _errors = bulk(
+ client=client,
+ actions=bulk_chunk_documents,
+ ignore_status=ignore_status,
+ chunk_size=chunk_size,
+ max_chunk_bytes=max_chunk_bytes,
+ max_retries=max_retries,
+ initial_backoff=initial_backoff,
+ max_backoff=max_backoff,
+ request_timeout=30,
+ **kwargs,
+ )
+ success += _success
+ errors += _errors # type: ignore
+ _logger.debug("indexed %s documents (%s/%s)", _success, success, total_documents)
+ progress_bar.update(success, force=True)
+ except TransportError as e:
+ if str(e.status_code) == "429": # Too Many Requests
+ _logger.error(
+ "Error 429 (Too Many Requests):"
+ "Try to tune bulk_size parameter."
+ "Read more here: https://aws.amazon.com/premiumsupport/knowledge-center/resolve-429-error-es"
+ )
+ raise e
+
+ finally:
+ _set_refresh_interval(client, index, refresh_interval)
+
+ return {"success": success, "errors": errors}
diff --git a/poetry.lock b/poetry.lock
index e9ff84c4d..d3282f25c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -394,7 +394,7 @@ python-versions = ">=3.6, <3.7"
name = "decorator"
version = "5.1.0"
description = "Decorators for Humans"
-category = "dev"
+category = "main"
optional = false
python-versions = ">=3.5"
@@ -719,6 +719,19 @@ python-versions = "*"
[package.extras]
dev = ["hypothesis"]
+[[package]]
+name = "jsonpath-ng"
+version = "1.5.3"
+description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+decorator = "*"
+ply = "*"
+six = "*"
+
[[package]]
name = "jsonschema"
version = "4.0.0"
@@ -1147,6 +1160,23 @@ python-versions = ">=3.6"
[package.dependencies]
et-xmlfile = "*"
+[[package]]
+name = "opensearch-py"
+version = "1.0.0"
+description = "Python low-level client for OpenSearch"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
+
+[package.dependencies]
+certifi = "*"
+urllib3 = ">=1.21.1,<2"
+
+[package.extras]
+async = ["aiohttp (>=3,<4)"]
+develop = ["requests (>=2.0.0,<3.0.0)", "coverage", "mock", "pyyaml", "pytest", "pytest-cov", "black", "jinja2"]
+requests = ["requests (>=2.4.0,<3.0.0)"]
+
[[package]]
name = "packaging"
version = "21.0"
@@ -1283,6 +1313,30 @@ importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
dev = ["pre-commit", "tox"]
testing = ["pytest", "pytest-benchmark"]
+[[package]]
+name = "ply"
+version = "3.11"
+description = "Python Lex & Yacc"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "progressbar2"
+version = "3.53.3"
+description = "A Python Progressbar library to provide visual (yet text based) progress to long running operations."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+python-utils = ">=2.3.0"
+six = "*"
+
+[package.extras]
+docs = ["sphinx (>=1.7.4)"]
+tests = ["flake8 (>=3.7.7)", "pytest (>=4.6.9)", "pytest-cov (>=2.6.1)", "freezegun (>=0.3.11)", "sphinx (>=1.8.5)"]
+
[[package]]
name = "prometheus-client"
version = "0.11.0"
@@ -1551,6 +1605,17 @@ category = "dev"
optional = false
python-versions = "*"
+[[package]]
+name = "python-utils"
+version = "2.5.6"
+description = "Python Utils is a module with some convenient utilities not included with the standard Python install"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+six = "*"
+
[[package]]
name = "pytz"
version = "2021.3"
@@ -1634,6 +1699,18 @@ urllib3 = ">=1.21.1,<1.27"
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
+[[package]]
+name = "requests-aws4auth"
+version = "1.1.1"
+description = "AWS4 authentication for Requests"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+requests = "*"
+six = "*"
+
[[package]]
name = "requests-unixsocket"
version = "0.2.0"
@@ -2529,6 +2606,11 @@ json5 = [
{file = "json5-0.9.6-py2.py3-none-any.whl", hash = "sha256:823e510eb355949bed817e1f3e2d682455dc6af9daf6066d5698d6a2ca4481c2"},
{file = "json5-0.9.6.tar.gz", hash = "sha256:9175ad1bc248e22bb8d95a8e8d765958bf0008fef2fe8abab5bc04e0f1ac8302"},
]
+jsonpath-ng = [
+ {file = "jsonpath-ng-1.5.3.tar.gz", hash = "sha256:a273b182a82c1256daab86a313b937059261b5c5f8c4fa3fc38b882b344dd567"},
+ {file = "jsonpath_ng-1.5.3-py2-none-any.whl", hash = "sha256:f75b95dbecb8a0f3b86fd2ead21c2b022c3f5770957492b9b6196ecccfeb10aa"},
+ {file = "jsonpath_ng-1.5.3-py3-none-any.whl", hash = "sha256:292a93569d74029ba75ac2dc3d3630fc0e17b2df26119a165fa1d498ca47bf65"},
+]
jsonschema = [
{file = "jsonschema-4.0.0-py3-none-any.whl", hash = "sha256:c773028c649441ab980015b5b622f4cd5134cf563daaf0235ca4b73cc3734f20"},
{file = "jsonschema-4.0.0.tar.gz", hash = "sha256:bc51325b929171791c42ebc1c70b9713eb134d3bb8ebd5474c8b659b15be6d86"},
@@ -2856,6 +2938,10 @@ openpyxl = [
{file = "openpyxl-3.0.9-py2.py3-none-any.whl", hash = "sha256:8f3b11bd896a95468a4ab162fc4fcd260d46157155d1f8bfaabb99d88cfcf79f"},
{file = "openpyxl-3.0.9.tar.gz", hash = "sha256:40f568b9829bf9e446acfffce30250ac1fa39035124d55fc024025c41481c90f"},
]
+opensearch-py = [
+ {file = "opensearch-py-1.0.0.tar.gz", hash = "sha256:fa952836cabfa1b2fb05f852edc1a373342494345e89fd52b7124daf4d296bb4"},
+ {file = "opensearch_py-1.0.0-py2.py3-none-any.whl", hash = "sha256:17afebc25dc890b96c4e9ec8692dcfdb6842c028ce8c2d252e8f55c587960177"},
+]
packaging = [
{file = "packaging-21.0-py3-none-any.whl", hash = "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"},
{file = "packaging-21.0.tar.gz", hash = "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7"},
@@ -2943,6 +3029,14 @@ pluggy = [
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
]
+ply = [
+ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"},
+ {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
+]
+progressbar2 = [
+ {file = "progressbar2-3.53.3-py2.py3-none-any.whl", hash = "sha256:6610fe393a4591967ecf9062d42c0663c8862092245c490e5971ec5f348755ca"},
+ {file = "progressbar2-3.53.3.tar.gz", hash = "sha256:f4e1c2d48e608850c59f793d6e74ccdebbcbaac7ffe917d45e9646ec0d664d6d"},
+]
prometheus-client = [
{file = "prometheus_client-0.11.0-py2.py3-none-any.whl", hash = "sha256:b014bc76815eb1399da8ce5fc84b7717a3e63652b0c0f8804092c9363acab1b2"},
{file = "prometheus_client-0.11.0.tar.gz", hash = "sha256:3a8baade6cb80bcfe43297e33e7623f3118d660d41387593758e2fb1ea173a86"},
@@ -3094,6 +3188,10 @@ python-dateutil = [
python-levenshtein = [
{file = "python-Levenshtein-0.12.2.tar.gz", hash = "sha256:dc2395fbd148a1ab31090dd113c366695934b9e85fe5a4b2a032745efd0346f6"},
]
+python-utils = [
+ {file = "python-utils-2.5.6.tar.gz", hash = "sha256:352d5b1febeebf9b3cdb9f3c87a3b26ef22d3c9e274a8ec1e7048ecd2fac4349"},
+ {file = "python_utils-2.5.6-py2.py3-none-any.whl", hash = "sha256:18fbc1a1df9a9061e3059a48ebe5c8a66b654d688b0e3ecca8b339a7f168f208"},
+]
pytz = [
{file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"},
{file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
@@ -3206,6 +3304,10 @@ requests = [
{file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"},
{file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},
]
+requests-aws4auth = [
+ {file = "requests-aws4auth-1.1.1.tar.gz", hash = "sha256:c0883346ce30b5018903a67da88df72f73ff06e1a320845bba9cd85e811ba0ba"},
+ {file = "requests_aws4auth-1.1.1-py2.py3-none-any.whl", hash = "sha256:dfd9f930ffde48a756b72b55698a8522875ea6358dcffbcc44a66700ace31783"},
+]
requests-unixsocket = [
{file = "requests-unixsocket-0.2.0.tar.gz", hash = "sha256:9e5c1a20afc3cf786197ae59c79bcdb0e7565f218f27df5f891307ee8817c1ea"},
{file = "requests_unixsocket-0.2.0-py2.py3-none-any.whl", hash = "sha256:014d07bfb66dc805a011a8b4b306cf4ec96d2eddb589f6b2b5765e626f0dc0cc"},
diff --git a/pyproject.toml b/pyproject.toml
index 36a4df379..23a635344 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,10 @@ pyodbc = { version = "~4.0.32", optional = true }
sphinx-bootstrap-theme = "^0.8.0"
Sphinx = "^4.2.0"
tox = "^3.24.4"
+requests-aws4auth = "^1.1.1"
+jsonpath-ng = "^1.5.3"
+progressbar2 = "^3.53.3"
+opensearch-py = "^1.0.0"
[tool.poetry.extras]
diff --git a/test_infra/app.py b/test_infra/app.py
index 4e27aa261..8c3395e22 100644
--- a/test_infra/app.py
+++ b/test_infra/app.py
@@ -2,6 +2,7 @@
from aws_cdk import core as cdk
from stacks.base_stack import BaseStack
from stacks.databases_stack import DatabasesStack
+from stacks.opensearch_stack import OpenSearchStack
app = cdk.App()
@@ -14,4 +15,12 @@
base.get_key,
)
+OpenSearchStack(
+ app,
+ "aws-data-wrangler-opensearch",
+ base.get_vpc,
+ base.get_bucket,
+ base.get_key,
+)
+
app.synth()
diff --git a/test_infra/poetry.lock b/test_infra/poetry.lock
index f68d38031..aa17ff35f 100644
--- a/test_infra/poetry.lock
+++ b/test_infra/poetry.lock
@@ -1,496 +1,638 @@
[[package]]
name = "attrs"
-version = "20.3.0"
+version = "21.2.0"
description = "Classes Without Boilerplate"
category = "main"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[package.extras]
-dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "furo", "sphinx", "pre-commit"]
-docs = ["furo", "sphinx", "zope.interface"]
-tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"]
-tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six"]
+dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"]
+docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
+tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"]
+tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"]
[[package]]
name = "aws-cdk.assets"
-version = "1.115.0"
+version = "1.124.0"
description = "This module is deprecated. All types are now available under the core module"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-applicationautoscaling"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::ApplicationAutoScaling"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-autoscaling-common" = "1.115.0"
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-autoscaling-common" = "1.124.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-autoscaling-common"
-version = "1.115.0"
+version = "1.124.0"
description = "Common implementation package for @aws-cdk/aws-autoscaling and @aws-cdk/aws-applicationautoscaling"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-certificatemanager"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::CertificateManager"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-route53" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-cloudformation"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::CloudFormation"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-sns" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-cloudwatch"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::CloudWatch"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-codeguruprofiler"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::CodeGuruProfiler"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-codestarnotifications"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::CodeStarNotifications"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-ec2"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::EC2"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-logs" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.aws-s3-assets" = "1.115.0"
-"aws-cdk.aws-ssm" = "1.115.0"
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
-"aws-cdk.region-info" = "1.115.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-s3-assets" = "1.124.0"
+"aws-cdk.aws-ssm" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
+"aws-cdk.region-info" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-ecr"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::ECR"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-events" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-ecr-assets"
-version = "1.115.0"
+version = "1.124.0"
description = "Docker image assets deployed to ECR"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.assets" = "1.115.0"
-"aws-cdk.aws-ecr" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.assets" = "1.124.0"
+"aws-cdk.aws-ecr" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-efs"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::EFS"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-events"
-version = "1.115.0"
+version = "1.124.0"
description = "Amazon EventBridge Construct Library"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-glue"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::Glue"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.assets" = "1.124.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-s3-assets" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-iam"
-version = "1.115.0"
+version = "1.124.0"
description = "CDK routines for easily assigning correct and minimal IAM permissions"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.region-info" = "1.115.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.region-info" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-kms"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::KMS"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-lambda"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::Lambda"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-applicationautoscaling" = "1.115.0"
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-codeguruprofiler" = "1.115.0"
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-ecr" = "1.115.0"
-"aws-cdk.aws-ecr-assets" = "1.115.0"
-"aws-cdk.aws-efs" = "1.115.0"
-"aws-cdk.aws-events" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-logs" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.aws-s3-assets" = "1.115.0"
-"aws-cdk.aws-signer" = "1.115.0"
-"aws-cdk.aws-sqs" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-applicationautoscaling" = "1.124.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-codeguruprofiler" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-ecr" = "1.124.0"
+"aws-cdk.aws-ecr-assets" = "1.124.0"
+"aws-cdk.aws-efs" = "1.124.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-s3-assets" = "1.124.0"
+"aws-cdk.aws-signer" = "1.124.0"
+"aws-cdk.aws-sqs" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
+"aws-cdk.region-info" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-logs"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::Logs"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-s3-assets" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-s3-assets" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-opensearchservice"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::OpenSearchService"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-certificatemanager" = "1.124.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-route53" = "1.124.0"
+"aws-cdk.aws-secretsmanager" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.custom-resources" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-rds"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::RDS"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-events" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-logs" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.aws-secretsmanager" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-secretsmanager" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-redshift"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::Redshift"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.aws-secretsmanager" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-secretsmanager" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.custom-resources" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-route53"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::Route53"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.custom-resources" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-s3"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::S3"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-events" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-s3-assets"
-version = "1.115.0"
+version = "1.124.0"
description = "Deploy local files and directories to S3"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.assets" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.assets" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-sam"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for the AWS Serverless Application Model (SAM) resources"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-secretsmanager"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::SecretsManager"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-lambda" = "1.115.0"
-"aws-cdk.aws-sam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-sam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-signer"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::Signer"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-sns"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::SNS"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-codestarnotifications" = "1.124.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-sqs" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-sqs"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::SQS"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.aws-ssm"
-version = "1.115.0"
+version = "1.124.0"
description = "The CDK Construct Library for AWS::SSM"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.cloud-assembly-schema"
-version = "1.115.0"
+version = "1.124.0"
description = "Cloud Assembly Schema"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.core"
-version = "1.115.0"
+version = "1.124.0"
description = "AWS Cloud Development Kit Core Library"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
-"aws-cdk.region-info" = "1.115.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
+"aws-cdk.region-info" = "1.124.0"
constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.custom-resources"
+version = "1.124.0"
+description = "Constructs for implementing CDK custom resources"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-cloudformation" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-sns" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.cx-api"
-version = "1.115.0"
+version = "1.124.0"
description = "Cloud executable protocol"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-jsii = ">=1.31.0,<2.0.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
name = "aws-cdk.region-info"
-version = "1.115.0"
+version = "1.124.0"
description = "AWS region information, such as service principal names"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
publication = ">=0.0.3"
[[package]]
@@ -509,14 +651,14 @@ dev = ["bumpversion", "wheel", "watchdog", "flake8", "tox", "coverage", "sphinx"
[[package]]
name = "cattrs"
-version = "1.6.0"
+version = "1.8.0"
description = "Composable complex class support for attrs and dataclasses."
category = "main"
optional = false
python-versions = ">=3.7,<4.0"
[package.dependencies]
-attrs = "*"
+attrs = ">=20"
[[package]]
name = "constructs"
@@ -547,17 +689,17 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
[[package]]
name = "jsii"
-version = "1.32.0"
+version = "1.34.0"
description = "Python client for jsii runtime"
category = "main"
optional = false
python-versions = "~=3.6"
[package.dependencies]
-attrs = ">=20.1,<21.0"
+attrs = ">=21.2,<22.0"
cattrs = [
{version = ">=1.0.0,<1.1.0", markers = "python_version < \"3.7\""},
- {version = ">=1.6.0,<1.7.0", markers = "python_version >= \"3.7\""},
+ {version = ">=1.8.0,<1.9.0", markers = "python_version >= \"3.7\""},
]
importlib-resources = {version = "*", markers = "python_version < \"3.7\""}
python-dateutil = "*"
@@ -613,130 +755,158 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
[metadata]
lock-version = "1.1"
python-versions = ">=3.6.2, <3.10"
-content-hash = "6f8430d31b5e3d08bb0393b4c93ca223cc9d49b55bb3045f95326770d74347ca"
+content-hash = "6d95fccb052c85375178aa3ade72de9e4ee87c009d7e067dd7d4120c23ded9f5"
[metadata.files]
attrs = [
- {file = "attrs-20.3.0-py2.py3-none-any.whl", hash = "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6"},
- {file = "attrs-20.3.0.tar.gz", hash = "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"},
+ {file = "attrs-21.2.0-py2.py3-none-any.whl", hash = "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1"},
+ {file = "attrs-21.2.0.tar.gz", hash = "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"},
]
"aws-cdk.assets" = [
- {file = "aws-cdk.assets-1.115.0.tar.gz", hash = "sha256:e3a569f900451f2f8429a2ad7cd059712f2903d24cbcaa023911f46362496d2d"},
- {file = "aws_cdk.assets-1.115.0-py3-none-any.whl", hash = "sha256:d7f62fdaf500980cbcb0cab82cd08cb7334683428cfb3c67c68f72371e29109f"},
+ {file = "aws-cdk.assets-1.124.0.tar.gz", hash = "sha256:8097177806b29824a69bbdb5df9ec74f7b360708b51ed860613d38e30414054a"},
+ {file = "aws_cdk.assets-1.124.0-py3-none-any.whl", hash = "sha256:c94b63e36c094111c6a9abb2a9d6c694f3e123034cf5dc23e5293fdc32c44fb3"},
]
"aws-cdk.aws-applicationautoscaling" = [
- {file = "aws-cdk.aws-applicationautoscaling-1.115.0.tar.gz", hash = "sha256:e174b3247252bfec419389b896267516d2f874ec56456880116f79204ae9e3e5"},
- {file = "aws_cdk.aws_applicationautoscaling-1.115.0-py3-none-any.whl", hash = "sha256:45eff7fb107924b6ade243e88edae49f14a599ff3afcaf40a73969c45de733b5"},
+ {file = "aws-cdk.aws-applicationautoscaling-1.124.0.tar.gz", hash = "sha256:c3bc89c2754b7ce029c667be9ab1633884bf574d33773a1dc07a3cff1b698670"},
+ {file = "aws_cdk.aws_applicationautoscaling-1.124.0-py3-none-any.whl", hash = "sha256:d0dcc91b3de13ad46b874813877af3746adec3ad9f7380b2408a14cdd848b65c"},
]
"aws-cdk.aws-autoscaling-common" = [
- {file = "aws-cdk.aws-autoscaling-common-1.115.0.tar.gz", hash = "sha256:b87c84d3e558b20e3bea515d89cb59d633d71e2c8a6e4e859a691f3c06d45c10"},
- {file = "aws_cdk.aws_autoscaling_common-1.115.0-py3-none-any.whl", hash = "sha256:bc0e56fe4fedd6e5a0d094845c4e1b2681bf60dfb72f2062392ef7edd5b157bd"},
+ {file = "aws-cdk.aws-autoscaling-common-1.124.0.tar.gz", hash = "sha256:03f57fcd34d9e370c0929de63c674bdbf2a8fbe2efed40942e0e2bff1ed1d436"},
+ {file = "aws_cdk.aws_autoscaling_common-1.124.0-py3-none-any.whl", hash = "sha256:1969320c12bf4107346233b3310464c1e752b65a6577c865abb809711cec2c1f"},
+]
+"aws-cdk.aws-certificatemanager" = [
+ {file = "aws-cdk.aws-certificatemanager-1.124.0.tar.gz", hash = "sha256:291e7c29aa406619276dc141a3827b0af15c9a997b6e7dc1a8c59bbfb3aa7df7"},
+ {file = "aws_cdk.aws_certificatemanager-1.124.0-py3-none-any.whl", hash = "sha256:23071000fe931dd817638b059991872fe93a91a1c1d33750f080c536e9aaf302"},
+]
+"aws-cdk.aws-cloudformation" = [
+ {file = "aws-cdk.aws-cloudformation-1.124.0.tar.gz", hash = "sha256:c38efe614113c3bdcb964f6c20742994154392bc78e82c34a299d0f1b26a7c65"},
+ {file = "aws_cdk.aws_cloudformation-1.124.0-py3-none-any.whl", hash = "sha256:9b530359f567555b83dfbb99f7112fdb2ad893176032ff542ce09f7454ce5107"},
]
"aws-cdk.aws-cloudwatch" = [
- {file = "aws-cdk.aws-cloudwatch-1.115.0.tar.gz", hash = "sha256:adb27916047303bf5748d503dc608041d30ea002b47c4e2c370d2084c1bec8c4"},
- {file = "aws_cdk.aws_cloudwatch-1.115.0-py3-none-any.whl", hash = "sha256:2b6b5e954f0b2a629d977cb6db93ec38e2c3c6dde43d88369dbc7a64c92d1ce1"},
+ {file = "aws-cdk.aws-cloudwatch-1.124.0.tar.gz", hash = "sha256:221734f8b6f940068714fe00fd68a8a32d767c713b2adb874365482836248f7f"},
+ {file = "aws_cdk.aws_cloudwatch-1.124.0-py3-none-any.whl", hash = "sha256:a9a4abf58e31cb53872601296b41cf8e8d5106807a5775d19a6ac05fbe34bef0"},
]
"aws-cdk.aws-codeguruprofiler" = [
- {file = "aws-cdk.aws-codeguruprofiler-1.115.0.tar.gz", hash = "sha256:bd8954511616b1ae8e6bd88122de5cb94c7d16b79f051452b490af9ec729124d"},
- {file = "aws_cdk.aws_codeguruprofiler-1.115.0-py3-none-any.whl", hash = "sha256:48d6a7ea1a372e3e1dbdb0307c7665ba486ef58b80d1d2ebb56cabb03b40af80"},
+ {file = "aws-cdk.aws-codeguruprofiler-1.124.0.tar.gz", hash = "sha256:e37cd801e5b7fa93a0dba84effc36cd94f090b83988c4f165815ba585f7ca866"},
+ {file = "aws_cdk.aws_codeguruprofiler-1.124.0-py3-none-any.whl", hash = "sha256:4d4bd49ea2415d9daf7c3c57403060802e5f523bd476a276f1e00a3e3d73c15d"},
+]
+"aws-cdk.aws-codestarnotifications" = [
+ {file = "aws-cdk.aws-codestarnotifications-1.124.0.tar.gz", hash = "sha256:478486be7e24e455c1fd8a54489de491005997b6ebdc06212a6231e89471414a"},
+ {file = "aws_cdk.aws_codestarnotifications-1.124.0-py3-none-any.whl", hash = "sha256:de73fbcceba282ddf3caf5e74b188e4685108cec845f573986ea3fec1c98beba"},
]
"aws-cdk.aws-ec2" = [
- {file = "aws-cdk.aws-ec2-1.115.0.tar.gz", hash = "sha256:e819f98e07d3ee24182f23d435bf164ca7bdfdd42e72305d975b2c75a5a57138"},
- {file = "aws_cdk.aws_ec2-1.115.0-py3-none-any.whl", hash = "sha256:0475af1a07e514136004870c590dd5b187dd4588eb291da4662ed2d7cf5956c7"},
+ {file = "aws-cdk.aws-ec2-1.124.0.tar.gz", hash = "sha256:f7515734cac0ef8eeaa003bef85364c878fad4a90876de313d156cc863199811"},
+ {file = "aws_cdk.aws_ec2-1.124.0-py3-none-any.whl", hash = "sha256:d000d22d87d887dfbc61b82be897234fc58f421b2fbbbc29f002b683b4fdac4f"},
]
"aws-cdk.aws-ecr" = [
- {file = "aws-cdk.aws-ecr-1.115.0.tar.gz", hash = "sha256:3083470a95283a95275e1f2ad30868f3591d0a5bf432cf4bab360dabe4cb2e29"},
- {file = "aws_cdk.aws_ecr-1.115.0-py3-none-any.whl", hash = "sha256:695842b3b892b404c3219d8b44b9ad7a8bf1fd1957abb97c618dba47e050108b"},
+ {file = "aws-cdk.aws-ecr-1.124.0.tar.gz", hash = "sha256:cbf940fbb76eb189143df45f67115673faf10a4b8e7f571660822604c9016aad"},
+ {file = "aws_cdk.aws_ecr-1.124.0-py3-none-any.whl", hash = "sha256:1661c6f8fd618ac75da7cdefd36adda747218e4fe27faa44b5df62ecabd0b3f3"},
]
"aws-cdk.aws-ecr-assets" = [
- {file = "aws-cdk.aws-ecr-assets-1.115.0.tar.gz", hash = "sha256:5450bbcebb89eff84327246c6049a90adefe73ed194bd62778ffeee6facf9042"},
- {file = "aws_cdk.aws_ecr_assets-1.115.0-py3-none-any.whl", hash = "sha256:8e7e5b2351370b795b12abd0812a3ace241cc46df8d67aecb92410de2bfd7318"},
+ {file = "aws-cdk.aws-ecr-assets-1.124.0.tar.gz", hash = "sha256:b2401b111474413436e664c1652d02d6e053ca946cbbe224a4f9c3c6220005df"},
+ {file = "aws_cdk.aws_ecr_assets-1.124.0-py3-none-any.whl", hash = "sha256:7dc6b6f262baffa37df3ed898d8ae74ef2384793be822a91b91159cb512183ff"},
]
"aws-cdk.aws-efs" = [
- {file = "aws-cdk.aws-efs-1.115.0.tar.gz", hash = "sha256:eb96d01635283dbee1101fe57e0a19310974c8de02f75d9042adbab44139fe65"},
- {file = "aws_cdk.aws_efs-1.115.0-py3-none-any.whl", hash = "sha256:8e9e3f0f837e1ff3cfe96da5d700095f24d132c11cc7544f7a9f20024fa27372"},
+ {file = "aws-cdk.aws-efs-1.124.0.tar.gz", hash = "sha256:90aaccea5ff55ae4a3045540f78e007c048709e142d77947aa15ad655ed4c011"},
+ {file = "aws_cdk.aws_efs-1.124.0-py3-none-any.whl", hash = "sha256:282db0bd269535fb19f0101d4fa6b9cb7cf7dcddf2eaf5d04d7f03fef156c9d0"},
]
"aws-cdk.aws-events" = [
- {file = "aws-cdk.aws-events-1.115.0.tar.gz", hash = "sha256:4ce7f0e894c61849e8157a0170cb74ec5223d18dc613075912f2ef560974856b"},
- {file = "aws_cdk.aws_events-1.115.0-py3-none-any.whl", hash = "sha256:a817f0f46c027163a30eb5bab254540e00f5e5285bb1e8678dfd724f8f1187c0"},
+ {file = "aws-cdk.aws-events-1.124.0.tar.gz", hash = "sha256:0b6b5ffca233c0b5d7abaf011072ca896463ce391242ffdf7bf4def28dec8213"},
+ {file = "aws_cdk.aws_events-1.124.0-py3-none-any.whl", hash = "sha256:92ba680941365de0f90ad7881b8c2e787c50b85a69bc32e82b4578a3276f810f"},
]
"aws-cdk.aws-glue" = [
- {file = "aws-cdk.aws-glue-1.115.0.tar.gz", hash = "sha256:a85d344e61cfb3e0953665bcd85fd4b7ac282417fe7099e2c54cc393f62bfa99"},
- {file = "aws_cdk.aws_glue-1.115.0-py3-none-any.whl", hash = "sha256:ca2780bf366ab2ba74adb98b6a49c95ee6e5dbde2bc5758657cb5d4197c996ce"},
+ {file = "aws-cdk.aws-glue-1.124.0.tar.gz", hash = "sha256:b43f747a2b8480ca848f7ab27b1dd0c7e352c9602fdb039cfc78f5013dbef450"},
+ {file = "aws_cdk.aws_glue-1.124.0-py3-none-any.whl", hash = "sha256:d90bc85ae0d6b03536879d6fa72cdc49cfe1d58451b9e0065786b682dc2f9422"},
]
"aws-cdk.aws-iam" = [
- {file = "aws-cdk.aws-iam-1.115.0.tar.gz", hash = "sha256:fe4e3138d6544755cbeb2400fd770b583b01906443648a4588085de2e781707f"},
- {file = "aws_cdk.aws_iam-1.115.0-py3-none-any.whl", hash = "sha256:7ba923894c6ecce33147527dccbf90fdaecc7a5561b2ca9398623f1f063f898c"},
+ {file = "aws-cdk.aws-iam-1.124.0.tar.gz", hash = "sha256:9d779439048832c6f4d5722196a9490d80bb649e56bb4dadc554ea3ae940f797"},
+ {file = "aws_cdk.aws_iam-1.124.0-py3-none-any.whl", hash = "sha256:249fc537532f73c3cd3f59dc635be58535d9e9f9418062214eb664e14b59a6be"},
]
"aws-cdk.aws-kms" = [
- {file = "aws-cdk.aws-kms-1.115.0.tar.gz", hash = "sha256:1d1feca56bc4c2de722f59a07ee8dc36b6d7a31d70ffe32de5f76c099b2b6322"},
- {file = "aws_cdk.aws_kms-1.115.0-py3-none-any.whl", hash = "sha256:c692b0cebe2b0106ddc0ec3946a895941176b35411d46b27ae9bfb06cdaa9d6d"},
+ {file = "aws-cdk.aws-kms-1.124.0.tar.gz", hash = "sha256:205e79bc8f8e009bd1b5df236f0336e977eb141c70575a42080e36829358215f"},
+ {file = "aws_cdk.aws_kms-1.124.0-py3-none-any.whl", hash = "sha256:91294f10f02000743eef712da5ba7ea2749b43e4a0ad7d4715c9c95b6a472c10"},
]
"aws-cdk.aws-lambda" = [
- {file = "aws-cdk.aws-lambda-1.115.0.tar.gz", hash = "sha256:11eec3652671f37d261f991eaf963726fed281c5aafe77e9f83afab899398892"},
- {file = "aws_cdk.aws_lambda-1.115.0-py3-none-any.whl", hash = "sha256:65000012469a64096d25614c23e22da74a3d15234925cf44b29fd3d63d21b993"},
+ {file = "aws-cdk.aws-lambda-1.124.0.tar.gz", hash = "sha256:801552637c408a693a7b13967da4ec4e8a623f22b90fb0fdfb845c23765e4e29"},
+ {file = "aws_cdk.aws_lambda-1.124.0-py3-none-any.whl", hash = "sha256:50d774d026a8a0ca5089df5c8b2c7cc2ef74db2a4b20c5d049210b154d3af03d"},
]
"aws-cdk.aws-logs" = [
- {file = "aws-cdk.aws-logs-1.115.0.tar.gz", hash = "sha256:de30016914a17ca59d55f36029aa10fdc800f8fa69f4a5de822898aebbb29a78"},
- {file = "aws_cdk.aws_logs-1.115.0-py3-none-any.whl", hash = "sha256:8c6adcf54e066a71a6a7031a8592f52f09a01ca0d6a6d1f51080f9996ad7ac52"},
+ {file = "aws-cdk.aws-logs-1.124.0.tar.gz", hash = "sha256:2fba565fc4f12b397bd9df1cd9964c1b35ce1ca65cd618407b6b1777bc43d292"},
+ {file = "aws_cdk.aws_logs-1.124.0-py3-none-any.whl", hash = "sha256:1f4b1ff436f2d0663e6c76264d7d6ee9dd0d90f3d9c09e5e93f1b0f31abbc379"},
+]
+"aws-cdk.aws-opensearchservice" = [
+ {file = "aws-cdk.aws-opensearchservice-1.124.0.tar.gz", hash = "sha256:d1bd4ca9ac9cf38b7c04a5e1e63eefe30e6e5e40adc0134e61d468694c71c4b1"},
+ {file = "aws_cdk.aws_opensearchservice-1.124.0-py3-none-any.whl", hash = "sha256:170417a55884ac8f26b0ae4cc59c085c8c2a0607b18ca906c1ee4d366b737d85"},
]
"aws-cdk.aws-rds" = [
- {file = "aws-cdk.aws-rds-1.115.0.tar.gz", hash = "sha256:c562843534494ef283474ebd7bba4e44e0b7cb063c0121e20f08ba49749a2a60"},
- {file = "aws_cdk.aws_rds-1.115.0-py3-none-any.whl", hash = "sha256:7c00e329b6455b4279ad9880c2e033509b27be63b31626413f28558ae8d24a7f"},
+ {file = "aws-cdk.aws-rds-1.124.0.tar.gz", hash = "sha256:20057fc95cda55fc504987dc0395062836dacc72efce2c86051677a1bb6d8d43"},
+ {file = "aws_cdk.aws_rds-1.124.0-py3-none-any.whl", hash = "sha256:bd66c0f76548cee6fb1f100f0e36ab9d5933ef70121b072ae05b3dd26e408ff3"},
]
"aws-cdk.aws-redshift" = [
- {file = "aws-cdk.aws-redshift-1.115.0.tar.gz", hash = "sha256:758e6e940e7a432d46d144ebf8002af51fbe98d452221725510f01488847f9a3"},
- {file = "aws_cdk.aws_redshift-1.115.0-py3-none-any.whl", hash = "sha256:311dcb36814434214917ad707689a210016ce1d6286c69d44ec01f5df27a3c7d"},
+ {file = "aws-cdk.aws-redshift-1.124.0.tar.gz", hash = "sha256:70cb4700cdfecad592524cd017a4a859b3d4ae407b3d2fcf329022c1d2faf863"},
+ {file = "aws_cdk.aws_redshift-1.124.0-py3-none-any.whl", hash = "sha256:4df5c19f74194fb9bd7a56e5b89b9312c35b681a322b0c1b0e248874f628ddc4"},
+]
+"aws-cdk.aws-route53" = [
+ {file = "aws-cdk.aws-route53-1.124.0.tar.gz", hash = "sha256:c5137b3c5211632b931d7b79234aec6006f72701c68477086e70c213320639ef"},
+ {file = "aws_cdk.aws_route53-1.124.0-py3-none-any.whl", hash = "sha256:97fe84e53c26c1a713a3b57341c2ecf488db56cc0b6127975656c53206ccd471"},
]
"aws-cdk.aws-s3" = [
- {file = "aws-cdk.aws-s3-1.115.0.tar.gz", hash = "sha256:73d72900194b944435056faf42c0df21ca7f6a0f941e0bc8d5cdf3de4c0261e9"},
- {file = "aws_cdk.aws_s3-1.115.0-py3-none-any.whl", hash = "sha256:81f85f3c107f05012a351260640a1bb1911106addbd26f2dd2c22d8c44122053"},
+ {file = "aws-cdk.aws-s3-1.124.0.tar.gz", hash = "sha256:3047305a4e013cb796532027c14908003ffe7af95fe8e214e3470a32a11c09e6"},
+ {file = "aws_cdk.aws_s3-1.124.0-py3-none-any.whl", hash = "sha256:0b08821e3b79c26110068f54aabdb938da55b562dcf2a28a7171d930334ce71a"},
]
"aws-cdk.aws-s3-assets" = [
- {file = "aws-cdk.aws-s3-assets-1.115.0.tar.gz", hash = "sha256:4aa793512b08d73f0bacb71f72f607a510672d077216cdd1ac307c65bd0751ae"},
- {file = "aws_cdk.aws_s3_assets-1.115.0-py3-none-any.whl", hash = "sha256:0bb1eea914908a5fc69a505b118e89f7d3097bce309126167b738a0aefd98ec6"},
+ {file = "aws-cdk.aws-s3-assets-1.124.0.tar.gz", hash = "sha256:568d4c598319e3bf1869536be0586b1004d3c43c2133ba94bf9cda4ad4ae5d5d"},
+ {file = "aws_cdk.aws_s3_assets-1.124.0-py3-none-any.whl", hash = "sha256:125c5e3786f2c233512374080553b2a7592efa6a53203764979a1bb987c47338"},
]
"aws-cdk.aws-sam" = [
- {file = "aws-cdk.aws-sam-1.115.0.tar.gz", hash = "sha256:babca8a6fbf68a32ebf6f1fd54f6a7bc506d60dae007fd6e4b06f1637edd42fd"},
- {file = "aws_cdk.aws_sam-1.115.0-py3-none-any.whl", hash = "sha256:ece50ab527eb1e5f84f6de2ad503e7cd61a2351dfcb6446274f8099ffabfcfc5"},
+ {file = "aws-cdk.aws-sam-1.124.0.tar.gz", hash = "sha256:39db01a4d88fd05c57dbc4f0c76c2471eab3e75753febc30f2847c546fa8292b"},
+ {file = "aws_cdk.aws_sam-1.124.0-py3-none-any.whl", hash = "sha256:b1ca75d2fb13898ed66cd4ee364cfa0b4f0924ab4583994ec4a7200d10c8c71b"},
]
"aws-cdk.aws-secretsmanager" = [
- {file = "aws-cdk.aws-secretsmanager-1.115.0.tar.gz", hash = "sha256:6de8204e4bbcbe8df8852646933c1d8d8cb1332374baee9fe780bd2b413e2423"},
- {file = "aws_cdk.aws_secretsmanager-1.115.0-py3-none-any.whl", hash = "sha256:0acf55659f67ac43c69be9a17e40e382d6122abc8055f092332723e07db15fd9"},
+ {file = "aws-cdk.aws-secretsmanager-1.124.0.tar.gz", hash = "sha256:76d3ded9f20d29520d4e54e15c335718cac4f938aacb4827a2a9f98af417576f"},
+ {file = "aws_cdk.aws_secretsmanager-1.124.0-py3-none-any.whl", hash = "sha256:0b6ae44966600943eb66fc48a93a0ae2bac60c8d6a5ff9c687ad9675b9f2bc5f"},
]
"aws-cdk.aws-signer" = [
- {file = "aws-cdk.aws-signer-1.115.0.tar.gz", hash = "sha256:9050e46e059edcde6b8e1d80b0d792eb2b4ad36cc00ce0b284d04a15b019b216"},
- {file = "aws_cdk.aws_signer-1.115.0-py3-none-any.whl", hash = "sha256:3b4b920dd5c8873bb0b60c0d2ae340fad434e7f011296f465d482afc094b25da"},
+ {file = "aws-cdk.aws-signer-1.124.0.tar.gz", hash = "sha256:96dd4ae63b43c7c12fde59f7ebbbea1895964a5f08c6e2ca4a2a1062abcc2399"},
+ {file = "aws_cdk.aws_signer-1.124.0-py3-none-any.whl", hash = "sha256:2fe614e6ce1ea6259d60f3adced41eaefdeace0cf77d961b5fcef815e1f82428"},
+]
+"aws-cdk.aws-sns" = [
+ {file = "aws-cdk.aws-sns-1.124.0.tar.gz", hash = "sha256:21e838c52cdd9bdcd98fc0fbe16ffad2bf10ba6bf31c5bfcdd9f49a8b3479d0c"},
+ {file = "aws_cdk.aws_sns-1.124.0-py3-none-any.whl", hash = "sha256:cb3820fd79643d1c5fb0b69f2b4755900dd16756af0f4c36706d68220a845d8b"},
]
"aws-cdk.aws-sqs" = [
- {file = "aws-cdk.aws-sqs-1.115.0.tar.gz", hash = "sha256:b24e03f0027fd99c6cdfe604e3a2b3d0d203d616dffafc74f74f6715083e2b08"},
- {file = "aws_cdk.aws_sqs-1.115.0-py3-none-any.whl", hash = "sha256:cda589452cb4a6db584050e50f14fbe11757fb0b3aff63f50ae663fad5b7bf27"},
+ {file = "aws-cdk.aws-sqs-1.124.0.tar.gz", hash = "sha256:ffed4754784de29473f554e450c6ec1b96c7508a2706406fe8d6442f2a31c58c"},
+ {file = "aws_cdk.aws_sqs-1.124.0-py3-none-any.whl", hash = "sha256:382721ca5d82dce9ec2625e5bae26132151748ee60e1269a0aa91cfd03227ee7"},
]
"aws-cdk.aws-ssm" = [
- {file = "aws-cdk.aws-ssm-1.115.0.tar.gz", hash = "sha256:960330865ee74485cab510ba1cac5d8d4578e777f1a421b14e8a20895bbe5ac5"},
- {file = "aws_cdk.aws_ssm-1.115.0-py3-none-any.whl", hash = "sha256:4431c43667b57fe2883a9ef022b277cbd3b62f6ab13cb0b1221513f7f76f2aac"},
+ {file = "aws-cdk.aws-ssm-1.124.0.tar.gz", hash = "sha256:bcfc99a5cdf23849503c72d93b9e5734d11976453004f13ebca2a66aeb3df10c"},
+ {file = "aws_cdk.aws_ssm-1.124.0-py3-none-any.whl", hash = "sha256:4d7335c2ce0200c1ed347422139c9d9b07c71297253ba911470114277996cc76"},
]
"aws-cdk.cloud-assembly-schema" = [
- {file = "aws-cdk.cloud-assembly-schema-1.115.0.tar.gz", hash = "sha256:d565a8418e0cc05d3471dd48424477528d72bdd7d17adc9a049068559666a3ae"},
- {file = "aws_cdk.cloud_assembly_schema-1.115.0-py3-none-any.whl", hash = "sha256:0686e6f7e5da48dbd2ff724953d51eb0495b6772bdb17400024bb42e6fe05baf"},
+ {file = "aws-cdk.cloud-assembly-schema-1.124.0.tar.gz", hash = "sha256:d2989a6742ad988fa0f7085ab67fb7ced14f4c3b1a98cc0bf4a0ea1a9358667c"},
+ {file = "aws_cdk.cloud_assembly_schema-1.124.0-py3-none-any.whl", hash = "sha256:77d3f63629b7213c639ffd4c46eb63ce9dd048e9a91a045afa72dcce9576ee6b"},
]
"aws-cdk.core" = [
- {file = "aws-cdk.core-1.115.0.tar.gz", hash = "sha256:42a691cc183219ce76eb58e17507edf768a0f5eca0ea98661b4b1f16f178b90d"},
- {file = "aws_cdk.core-1.115.0-py3-none-any.whl", hash = "sha256:93a8e3d87f79af75866bf3f1cfc702dd5664526ec0f70a1c5f7ade82cb1536b1"},
+ {file = "aws-cdk.core-1.124.0.tar.gz", hash = "sha256:bbdc1cf5affc34d0caa549771dc6b41ce467744f8ca727b215f0d89b853f4f0c"},
+ {file = "aws_cdk.core-1.124.0-py3-none-any.whl", hash = "sha256:56c4549161029c707aa527882e4741fca1ef4c46f63a6417e56e968710cfba7c"},
+]
+"aws-cdk.custom-resources" = [
+ {file = "aws-cdk.custom-resources-1.124.0.tar.gz", hash = "sha256:d2be1a1636b65e275521970b9c9accd02718f678ebb074a580b15b695e4b60d5"},
+ {file = "aws_cdk.custom_resources-1.124.0-py3-none-any.whl", hash = "sha256:6c9abcc046a92dc6845c8a81e33ac727da95e0c0d95b3fba0d433de7dae10a61"},
]
"aws-cdk.cx-api" = [
- {file = "aws-cdk.cx-api-1.115.0.tar.gz", hash = "sha256:10251ef8deaf7acfb7f7356e07c53cd86bbd8725631795e1ce8f8891bcaffad0"},
- {file = "aws_cdk.cx_api-1.115.0-py3-none-any.whl", hash = "sha256:6c03bc14f8d645e63329cb152b2f1fe339a556c297f1c3ecfa75ca9a981f9dca"},
+ {file = "aws-cdk.cx-api-1.124.0.tar.gz", hash = "sha256:b8ad4e1a2a5545dd256b50d36efb6d59b9b89b4b1034e7b7f9edfdaa476b181b"},
+ {file = "aws_cdk.cx_api-1.124.0-py3-none-any.whl", hash = "sha256:64b6f3ba0313cdea9963f9d210932cf770366a9d860520e1f15e64a26e97c5d6"},
]
"aws-cdk.region-info" = [
- {file = "aws-cdk.region-info-1.115.0.tar.gz", hash = "sha256:4f6b282fa495c244c1f96deea4aed77e702312373204e34b3bba53da27851974"},
- {file = "aws_cdk.region_info-1.115.0-py3-none-any.whl", hash = "sha256:b346bdab4bf54a5956fab020bc085b6c2c304f485dd2d09c8fb586728dfe7c11"},
+ {file = "aws-cdk.region-info-1.124.0.tar.gz", hash = "sha256:c28d31226f9000db1375044ea22ba496cc75e8c3db6aa1493a687ff0f89ccdae"},
+ {file = "aws_cdk.region_info-1.124.0-py3-none-any.whl", hash = "sha256:594b5f275766b22864e6111f194cfe7a12713ffc61963d063ce06812fa484728"},
]
cattrs = [
{file = "cattrs-1.0.0-py2.py3-none-any.whl", hash = "sha256:616972ae3dfa6e623a40ad3cb845420e64942989152774ab055e5c2b2f89f997"},
{file = "cattrs-1.0.0.tar.gz", hash = "sha256:b7ab5cf8ad127c42eefd01410c1c6e28569a45a255ea80ed968511873c433c7a"},
- {file = "cattrs-1.6.0-py3-none-any.whl", hash = "sha256:c8de53900e3acad94ca83750eb12bb38aa85ce9114be47177c943e2f0eca63b0"},
- {file = "cattrs-1.6.0.tar.gz", hash = "sha256:3e2cd5dc8a1006d5da53ddcbf4f0b1dd3a21e294323b257678d0a96721f8253a"},
+ {file = "cattrs-1.8.0-py3-none-any.whl", hash = "sha256:901fb2040529ae8fc9d93f48a2cdf7de3e983312ffb2a164ffa4e9847f253af1"},
+ {file = "cattrs-1.8.0.tar.gz", hash = "sha256:5c121ab06a7cac494813c228721a7feb5a6423b17316eeaebf13f5a03e5b0d53"},
]
constructs = [
{file = "constructs-3.3.101-py3-none-any.whl", hash = "sha256:0605ea091dda433f0915ba5b3c74bf967d90fb0cf975a5c3b34a7150a3cf48d1"},
@@ -747,8 +917,8 @@ importlib-resources = [
{file = "importlib_resources-5.2.0.tar.gz", hash = "sha256:22a2c42d8c6a1d30aa8a0e1f57293725bfd5c013d562585e46aff469e0ff78b3"},
]
jsii = [
- {file = "jsii-1.32.0-py3-none-any.whl", hash = "sha256:c71321c4b74ed2c29edc9943c22a36c60a8626df6e0a7173b9ae41366b1a9cb9"},
- {file = "jsii-1.32.0.tar.gz", hash = "sha256:b95e7747812e16cafbfde80b714d9b684c7a4ee57a00cbaf8f138d5868bdb2ae"},
+ {file = "jsii-1.34.0-py3-none-any.whl", hash = "sha256:d0a703d0d44bf78bb90529699599d2a58a68ca764f996808e97eafc68e2467de"},
+ {file = "jsii-1.34.0.tar.gz", hash = "sha256:e72ba5fafabdd5b6a3a65bd2cf42302eb87f2fe7c6339bddb808226a91623654"},
]
publication = [
{file = "publication-0.0.3-py2.py3-none-any.whl", hash = "sha256:0248885351febc11d8a1098d5c8e3ab2dabcf3e8c0c96db1e17ecd12b53afbe6"},
diff --git a/test_infra/pyproject.toml b/test_infra/pyproject.toml
index e6dda67cb..02e0241d8 100644
--- a/test_infra/pyproject.toml
+++ b/test_infra/pyproject.toml
@@ -7,14 +7,15 @@ license = "Apache License 2.0"
[tool.poetry.dependencies]
python = ">=3.6.2, <3.10"
-"aws-cdk.core" = "^1.115.0"
-"aws-cdk.aws-ec2" = "^1.115.0"
-"aws-cdk.aws-glue" = "^1.115.0"
-"aws-cdk.aws-iam" = "^1.115.0"
-"aws-cdk.aws-kms" = "^1.115.0"
-"aws-cdk.aws-logs" = "^1.115.0"
-"aws-cdk.aws-s3" = "^1.115.0"
-"aws-cdk.aws-redshift" = "^1.115.0"
-"aws-cdk.aws-rds" = "^1.115.0"
-"aws-cdk.aws-secretsmanager" = "^1.115.0"
-"aws-cdk.aws-ssm" = "^1.115.0"
+"aws-cdk.core" = "^1.124.0"
+"aws-cdk.aws-ec2" = "^1.124.0"
+"aws-cdk.aws-glue" = "^1.124.0"
+"aws-cdk.aws-iam" = "^1.124.0"
+"aws-cdk.aws-kms" = "^1.124.0"
+"aws-cdk.aws-logs" = "^1.124.0"
+"aws-cdk.aws-s3" = "^1.124.0"
+"aws-cdk.aws-redshift" = "^1.124.0"
+"aws-cdk.aws-rds" = "^1.124.0"
+"aws-cdk.aws-secretsmanager" = "^1.124.0"
+"aws-cdk.aws-ssm" = "^1.124.0"
+"aws-cdk.aws-opensearchservice" = "^1.124.0"
diff --git a/test_infra/scripts/delete-opensearch.sh b/test_infra/scripts/delete-opensearch.sh
new file mode 100755
index 000000000..1c1c01ba2
--- /dev/null
+++ b/test_infra/scripts/delete-opensearch.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+set -e
+
+pushd ..
+cdk destroy aws-data-wrangler-opensearch
+popd
diff --git a/test_infra/scripts/deploy-opensearch.sh b/test_infra/scripts/deploy-opensearch.sh
new file mode 100755
index 000000000..e94818af4
--- /dev/null
+++ b/test_infra/scripts/deploy-opensearch.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -e
+
+pushd ..
+cdk bootstrap
+cdk deploy aws-data-wrangler-opensearch
+popd
diff --git a/test_infra/stacks/opensearch_stack.py b/test_infra/stacks/opensearch_stack.py
new file mode 100644
index 000000000..f3bc6a1f8
--- /dev/null
+++ b/test_infra/stacks/opensearch_stack.py
@@ -0,0 +1,105 @@
+from aws_cdk import aws_ec2 as ec2
+from aws_cdk import aws_iam as iam
+from aws_cdk import aws_kms as kms
+from aws_cdk import aws_opensearchservice as opensearch
+from aws_cdk import aws_s3 as s3
+from aws_cdk import aws_secretsmanager as secrets
+from aws_cdk import core as cdk
+
+
+def validate_domain_name(name: str):
+ if not 3 <= len(name) <= 28:
+ raise ValueError(f"invalid domain name ({name}) - bad length ({len(name)})")
+ for c in name:
+ if not ("a" <= c <= "z" or c.isdigit() or c in ["-"]):
+ raise ValueError(f'invalid domain name ({name}) - bad character ("{c}")')
+
+
+class OpenSearchStack(cdk.Stack): # type: ignore
+ def __init__(
+ self,
+ scope: cdk.Construct,
+ construct_id: str,
+ vpc: ec2.IVpc,
+ bucket: s3.IBucket,
+ key: kms.Key,
+ **kwargs: str,
+ ) -> None:
+ """
+ AWS Data Wrangler Development OpenSearch Infrastructure.
+ Includes OpenSearch, Elasticsearch, ...
+ """
+ super().__init__(scope, construct_id, **kwargs)
+
+ self.vpc = vpc
+ self.key = key
+ self.bucket = bucket
+
+ self._set_opensearch_infra()
+ self._setup_opensearch_1_0()
+ self._setup_elasticsearch_7_10_fgac()
+
+ def _set_opensearch_infra(self) -> None:
+ self.username = "test"
+ # fmt: off
+ self.password_secret = secrets.Secret(
+ self,
+ "opensearch-password-secret",
+ secret_name="aws-data-wrangler/opensearch_password",
+ generate_secret_string=secrets.SecretStringGenerator(exclude_characters="/@\"\' \\"),
+ ).secret_value
+ # fmt: on
+ self.password = self.password_secret.to_string()
+
+ def _setup_opensearch_1_0(self) -> None:
+ domain_name = "wrangler-os-1-0"
+ validate_domain_name(domain_name)
+ domain_arn = f"arn:aws:es:{self.region}:{self.account}:domain/{domain_name}"
+ domain = opensearch.Domain(
+ self,
+ domain_name,
+ domain_name=domain_name,
+ version=opensearch.EngineVersion.OPENSEARCH_1_0,
+ capacity=opensearch.CapacityConfig(data_node_instance_type="t3.small.search", data_nodes=1),
+ access_policies=[
+ iam.PolicyStatement(
+ effect=iam.Effect.ALLOW,
+ actions=["es:*"],
+ principals=[iam.AccountRootPrincipal()],
+ resources=[f"{domain_arn}/*"],
+ )
+ ],
+ removal_policy=cdk.RemovalPolicy.DESTROY,
+ )
+
+ cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint)
+
+ def _setup_elasticsearch_7_10_fgac(self) -> None:
+ domain_name = "wrangler-es-7-10-fgac"
+ validate_domain_name(domain_name)
+ domain_arn = f"arn:aws:es:{self.region}:{self.account}:domain/{domain_name}"
+ domain = opensearch.Domain(
+ self,
+ domain_name,
+ domain_name=domain_name,
+ version=opensearch.EngineVersion.ELASTICSEARCH_7_10,
+ capacity=opensearch.CapacityConfig(data_node_instance_type="t3.small.search", data_nodes=1),
+ access_policies=[
+ iam.PolicyStatement(
+ effect=iam.Effect.ALLOW,
+ actions=["es:*"],
+ principals=[iam.AnyPrincipal()], # FGACs
+ resources=[f"{domain_arn}/*"],
+ )
+ ],
+ fine_grained_access_control=opensearch.AdvancedSecurityOptions(
+ master_user_name=self.username,
+ master_user_password=self.password_secret,
+ ),
+ node_to_node_encryption=True,
+ encryption_at_rest=opensearch.EncryptionAtRestOptions(enabled=True, kms_key=self.key),
+ enforce_https=True,
+ removal_policy=cdk.RemovalPolicy.DESTROY,
+ )
+
+ cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint)
diff --git a/tests/_utils.py b/tests/_utils.py
index 85df69484..5f74c4e83 100644
--- a/tests/_utils.py
+++ b/tests/_utils.py
@@ -528,9 +528,10 @@ def extract_cloudformation_outputs():
client = boto3.client("cloudformation")
response = try_it(client.describe_stacks, botocore.exceptions.ClientError, max_num_tries=5)
for stack in response.get("Stacks"):
- if (stack["StackName"] in ["aws-data-wrangler-base", "aws-data-wrangler-databases"]) and (
- stack["StackStatus"] in CFN_VALID_STATUS
- ):
+ if (
+ stack["StackName"]
+ in ["aws-data-wrangler-base", "aws-data-wrangler-databases", "aws-data-wrangler-opensearch"]
+ ) and (stack["StackStatus"] in CFN_VALID_STATUS):
for output in stack.get("Outputs"):
outputs[output.get("OutputKey")] = output.get("OutputValue")
return outputs
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
new file mode 100644
index 000000000..345d248e3
--- /dev/null
+++ b/tests/test_opensearch.py
@@ -0,0 +1,358 @@
+import json
+import logging
+import tempfile
+import time
+
+import boto3
+import pandas as pd
+import pytest # type: ignore
+
+import awswrangler as wr
+
+from ._utils import extract_cloudformation_outputs
+
+logging.getLogger("awswrangler").setLevel(logging.DEBUG)
+
+
+inspections_documents = [
+ {
+ "business_address": "315 California St",
+ "business_city": "San Francisco",
+ "business_id": "24936",
+ "business_latitude": "37.793199",
+ "business_location": {"lon": -122.400152, "lat": 37.793199},
+ "business_longitude": "-122.400152",
+ "business_name": "San Francisco Soup Company",
+ "business_postal_code": "94104",
+ "business_state": "CA",
+ "inspection_date": "2016-06-09T00:00:00.000",
+ "inspection_id": "24936_20160609",
+ "inspection_score": 77,
+ "inspection_type": "Routine - Unscheduled",
+ "risk_category": "Low Risk",
+ "violation_description": "Improper food labeling or menu misrepresentation",
+ "violation_id": "24936_20160609_103141",
+ },
+ {
+ "business_address": "10 Mason St",
+ "business_city": "San Francisco",
+ "business_id": "60354",
+ "business_latitude": "37.783527",
+ "business_location": {"lon": -122.409061, "lat": 37.783527},
+ "business_longitude": "-122.409061",
+ "business_name": "Soup Unlimited",
+ "business_postal_code": "94102",
+ "business_state": "CA",
+ "inspection_date": "2016-11-23T00:00:00.000",
+ "inspection_id": "60354_20161123",
+ "inspection_type": "Routine",
+ "inspection_score": 95,
+ },
+ {
+ "business_address": "2872 24th St",
+ "business_city": "San Francisco",
+ "business_id": "1797",
+ "business_latitude": "37.752807",
+ "business_location": {"lon": -122.409752, "lat": 37.752807},
+ "business_longitude": "-122.409752",
+ "business_name": "TIO CHILOS GRILL",
+ "business_postal_code": "94110",
+ "business_state": "CA",
+ "inspection_date": "2016-07-05T00:00:00.000",
+ "inspection_id": "1797_20160705",
+ "inspection_score": 90,
+ "inspection_type": "Routine - Unscheduled",
+ "risk_category": "Low Risk",
+ "violation_description": "Unclean nonfood contact surfaces",
+ "violation_id": "1797_20160705_103142",
+ },
+ {
+ "business_address": "1661 Tennessee St Suite 3B",
+ "business_city": "San Francisco Whard Restaurant",
+ "business_id": "66198",
+ "business_latitude": "37.75072",
+ "business_location": {"lon": -122.388478, "lat": 37.75072},
+ "business_longitude": "-122.388478",
+ "business_name": "San Francisco Restaurant",
+ "business_postal_code": "94107",
+ "business_state": "CA",
+ "inspection_date": "2016-05-27T00:00:00.000",
+ "inspection_id": "66198_20160527",
+ "inspection_type": "Routine",
+ "inspection_score": 56,
+ },
+ {
+ "business_address": "2162 24th Ave",
+ "business_city": "San Francisco",
+ "business_id": "5794",
+ "business_latitude": "37.747228",
+ "business_location": {"lon": -122.481299, "lat": 37.747228},
+ "business_longitude": "-122.481299",
+ "business_name": "Soup House",
+ "business_phone_number": "+14155752700",
+ "business_postal_code": "94116",
+ "business_state": "CA",
+ "inspection_date": "2016-09-07T00:00:00.000",
+ "inspection_id": "5794_20160907",
+ "inspection_score": 96,
+ "inspection_type": "Routine - Unscheduled",
+ "risk_category": "Low Risk",
+ "violation_description": "Unapproved or unmaintained equipment or utensils",
+ "violation_id": "5794_20160907_103144",
+ },
+ {
+ "business_address": "2162 24th Ave",
+ "business_city": "San Francisco",
+ "business_id": "5794",
+ "business_latitude": "37.747228",
+ "business_location": {"lon": -122.481299, "lat": 37.747228},
+ "business_longitude": "-122.481299",
+ "business_name": "Soup-or-Salad",
+ "business_phone_number": "+14155752700",
+ "business_postal_code": "94116",
+ "business_state": "CA",
+ "inspection_date": "2016-09-07T00:00:00.000",
+ "inspection_id": "5794_20160907",
+ "inspection_score": 96,
+ "inspection_type": "Routine - Unscheduled",
+ "risk_category": "Low Risk",
+ "violation_description": "Unapproved or unmaintained equipment or utensils",
+ "violation_id": "5794_20160907_103144",
+ },
+]
+
+
+@pytest.fixture(scope="session")
+def cloudformation_outputs():
+ return extract_cloudformation_outputs()
+
+
+@pytest.fixture(scope="session")
+def opensearch_password():
+ return boto3.client("secretsmanager").get_secret_value(SecretId="aws-data-wrangler/opensearch_password")[
+ "SecretString"
+ ]
+
+
+@pytest.fixture(scope="session")
+def domain_endpoint_opensearch_1_0(cloudformation_outputs):
+ return cloudformation_outputs["DomainEndpointwrangleros10"]
+
+
+@pytest.fixture(scope="session")
+def domain_endpoint_elasticsearch_7_10_fgac(cloudformation_outputs):
+ return cloudformation_outputs["DomainEndpointwrangleres710fgac"]
+
+
+def test_connection_opensearch_1_0(domain_endpoint_opensearch_1_0):
+ client = wr.opensearch.connect(host=domain_endpoint_opensearch_1_0)
+ print(client.info())
+ assert len(client.info()) > 0
+
+
+def test_connection_opensearch_1_0_https(domain_endpoint_opensearch_1_0):
+ client = wr.opensearch.connect(host=f"https://{domain_endpoint_opensearch_1_0}")
+ print(client.info())
+ assert len(client.info()) > 0
+
+
+def test_connection_elasticsearch_7_10_fgac(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
+ client = wr.opensearch.connect(
+ host=domain_endpoint_elasticsearch_7_10_fgac, username="test", password=opensearch_password
+ )
+ print(client.info())
+ assert len(client.info()) > 0
+
+
+@pytest.fixture(scope="session")
+def opensearch_1_0_client(domain_endpoint_opensearch_1_0):
+ client = wr.opensearch.connect(host=domain_endpoint_opensearch_1_0)
+ return client
+
+
+@pytest.fixture(scope="session")
+def elasticsearch_7_10_fgac_client(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
+ client = wr.opensearch.connect(
+ host=domain_endpoint_elasticsearch_7_10_fgac, username="test", password=opensearch_password
+ )
+ return client
+
+
+# testing multiple versions
+@pytest.fixture(params=["opensearch_1_0_client", "elasticsearch_7_10_fgac_client"])
+def client(request):
+ return request.getfixturevalue(request.param)
+
+
+def test_create_index(client):
+ index = "test_create_index"
+ wr.opensearch.delete_index(client, index)
+ time.sleep(0.5) # let the cluster clean up
+ response = wr.opensearch.create_index(
+ client=client,
+ index=index,
+ mappings={"properties": {"name": {"type": "text"}, "age": {"type": "integer"}}},
+ settings={"index": {"number_of_shards": 1, "number_of_replicas": 1}},
+ )
+ assert response.get("acknowledged", False) is True
+
+
+def test_delete_index(client):
+ index = "test_delete_index"
+ wr.opensearch.create_index(client, index=index)
+ response = wr.opensearch.delete_index(client, index=index)
+ print(response)
+ assert response.get("acknowledged", False) is True
+
+
+def test_index_df(client):
+ response = wr.opensearch.index_df(
+ client,
+ df=pd.DataFrame([{"_id": "1", "name": "John"}, {"_id": "2", "name": "George"}, {"_id": "3", "name": "Julia"}]),
+ index="test_index_df1",
+ )
+ print(response)
+ assert response.get("success", 0) == 3
+
+
+def test_index_documents(client):
+ response = wr.opensearch.index_documents(
+ client,
+ documents=[{"_id": "1", "name": "John"}, {"_id": "2", "name": "George"}, {"_id": "3", "name": "Julia"}],
+ index="test_index_documents1",
+ )
+ print(response)
+ assert response.get("success", 0) == 3
+
+
+def test_index_documents_id_keys(client):
+ response = wr.opensearch.index_documents(
+ client, documents=inspections_documents, index="test_index_documents_id_keys", id_keys=["inspection_id"]
+ )
+ print(response)
+
+
+def test_index_documents_no_id_keys(client):
+ response = wr.opensearch.index_documents(
+ client, documents=inspections_documents, index="test_index_documents_no_id_keys"
+ )
+ print(response)
+
+
+def test_search(client):
+ index = "test_search"
+ wr.opensearch.index_documents(
+ client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
+ )
+ df = wr.opensearch.search(
+ client,
+ index=index,
+ search_body={"query": {"match": {"business_name": "soup"}}},
+ _source=["inspection_id", "business_name", "business_location"],
+ )
+
+ print("")
+ print(df.to_string())
+ assert df.shape[0] == 3
+
+
+def test_search_filter_path(client):
+ index = "test_search"
+ wr.opensearch.index_documents(
+ client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
+ )
+ df = wr.opensearch.search(
+ client,
+ index=index,
+ search_body={"query": {"match": {"business_name": "soup"}}},
+ _source=["inspection_id", "business_name", "business_location"],
+ filter_path=["hits.hits._source"],
+ )
+
+ print("")
+ print(df.to_string())
+ assert df.shape[0] == 3
+
+
+def test_search_scroll(client):
+ index = "test_search_scroll"
+ wr.opensearch.index_documents(
+ client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
+ )
+ df = wr.opensearch.search(
+ client, index=index, is_scroll=True, _source=["inspection_id", "business_name", "business_location"]
+ )
+
+ print("")
+ print(df.to_string())
+ assert df.shape[0] == 5
+
+
+def test_search_sql(client):
+ index = "test_search_sql"
+ wr.opensearch.index_documents(
+ client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
+ )
+ df = wr.opensearch.search_by_sql(client, sql_query=f"select * from {index}")
+
+ print("")
+ print(df.to_string())
+ assert df.shape[0] == 5
+
+
+def test_index_json_local(client):
+ file_path = f"{tempfile.gettempdir()}/inspections.json"
+ with open(file_path, "w") as filehandle:
+ for doc in inspections_documents:
+ filehandle.write("%s\n" % json.dumps(doc))
+ response = wr.opensearch.index_json(client, index="test_index_json_local", path=file_path)
+ print(response)
+ assert response.get("success", 0) == 6
+
+
+def test_index_json_s3(client, path):
+ file_path = f"{tempfile.gettempdir()}/inspections.json"
+ with open(file_path, "w") as filehandle:
+ for doc in inspections_documents:
+ filehandle.write("%s\n" % json.dumps(doc))
+ s3 = boto3.client("s3")
+ path = f"{path}opensearch/inspections.json"
+ bucket, key = wr._utils.parse_path(path)
+ s3.upload_file(file_path, bucket, key)
+ response = wr.opensearch.index_json(client, index="test_index_json_s3", path=path)
+ print(response)
+ assert response.get("success", 0) == 6
+
+
+def test_index_csv_local(client):
+ file_path = f"{tempfile.gettempdir()}/inspections.csv"
+ index = "test_index_csv_local"
+ df = pd.DataFrame(inspections_documents)
+ df.to_csv(file_path, index=False)
+ response = wr.opensearch.index_csv(client, path=file_path, index=index)
+ print(response)
+ assert response.get("success", 0) == 6
+
+
+def test_index_csv_s3(client, path):
+ file_path = f"{tempfile.gettempdir()}/inspections.csv"
+ index = "test_index_csv_s3"
+ df = pd.DataFrame(inspections_documents)
+ df.to_csv(file_path, index=False)
+ s3 = boto3.client("s3")
+ path = f"{path}opensearch/inspections.csv"
+ bucket, key = wr._utils.parse_path(path)
+ s3.upload_file(file_path, bucket, key)
+ response = wr.opensearch.index_csv(client, path=path, index=index)
+ print(response)
+ assert response.get("success", 0) == 6
+
+
+@pytest.mark.skip(reason="takes a long time (~5 mins) since testing against small clusters")
+def test_index_json_s3_large_file(client):
+ path = "s3://irs-form-990/index_2011.json"
+ response = wr.opensearch.index_json(
+ client, index="test_index_json_s3_large_file", path=path, json_path="Filings2011", id_keys=["EIN"], bulk_size=20
+ )
+ print(response)
+ assert response.get("success", 0) > 0
diff --git a/tutorials/031 - OpenSearch.ipynb b/tutorials/031 - OpenSearch.ipynb
new file mode 100644
index 000000000..afe254669
--- /dev/null
+++ b/tutorials/031 - OpenSearch.ipynb
@@ -0,0 +1,1668 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[](https://github.com/awslabs/aws-data-wrangler)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 31 - OpenSearch"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Table of Contents\n",
+ "* [1. Initialize](#initialize)\n",
+ " * [Connect to your Amazon OpenSearch domain](#connect)\n",
+ " * [Enter your bucket name](#bucket)\n",
+ " * [Initialize sample data](#sample-data)\n",
+ "* [2. Indexing (load)](#indexing)\n",
+ "\t* [Index documents (no Pandas)](#index-documents)\n",
+ "\t* [Index json file](#index-json)\n",
+ " * [Index CSV](#index-csv)\n",
+ "* [3. Search](#search)\n",
+ "\t* [3.1 Search by DSL](#search-dsl)\n",
+ "\t* [3.2 Search by SQL](#search-sql)\n",
+ "* [4. Delete Indices](#delete-index)\n",
+ "* [5. Bonus - Prepare data and index from DataFrame](#bonus)\n",
+ "\t* [Prepare the data for indexing](#prepare-data)\n",
+ " * [Create index with mapping](#create-index-w-mapping)\n",
+ " * [Index dataframe](#index-df)\n",
+ " * [Execute geo query](#search-geo)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1. Initialize"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import awswrangler as wr"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Connect to your Amazon OpenSearch domain"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "client = wr.opensearch.connect(\n",
+ " host='OPENSEARCH-ENDPOINT',\n",
+ "# username='FGAC-USERNAME(OPTIONAL)',\n",
+ "# password='FGAC-PASSWORD(OPTIONAL)'\n",
+ ")\n",
+ "client.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Enter your bucket name"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bucket = 'BUCKET'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Initialize sample data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sf_restaurants_inspections = [\n",
+ " {\n",
+ " \"inspection_id\": \"24936_20160609\",\n",
+ " \"business_address\": \"315 California St\",\n",
+ " \"business_city\": \"San Francisco\",\n",
+ " \"business_id\": \"24936\",\n",
+ " \"business_location\": {\"lon\": -122.400152, \"lat\": 37.793199},\n",
+ " \"business_name\": \"San Francisco Soup Company\",\n",
+ " \"business_postal_code\": \"94104\",\n",
+ " \"business_state\": \"CA\",\n",
+ " \"inspection_date\": \"2016-06-09T00:00:00.000\",\n",
+ " \"inspection_score\": 77,\n",
+ " \"inspection_type\": \"Routine - Unscheduled\",\n",
+ " \"risk_category\": \"Low Risk\",\n",
+ " \"violation_description\": \"Improper food labeling or menu misrepresentation\",\n",
+ " \"violation_id\": \"24936_20160609_103141\",\n",
+ " },\n",
+ " {\n",
+ " \"inspection_id\": \"60354_20161123\",\n",
+ " \"business_address\": \"10 Mason St\",\n",
+ " \"business_city\": \"San Francisco\",\n",
+ " \"business_id\": \"60354\",\n",
+ " \"business_location\": {\"lon\": -122.409061, \"lat\": 37.783527},\n",
+ " \"business_name\": \"Soup Unlimited\",\n",
+ " \"business_postal_code\": \"94102\",\n",
+ " \"business_state\": \"CA\",\n",
+ " \"inspection_date\": \"2016-11-23T00:00:00.000\",\n",
+ " \"inspection_type\": \"Routine\",\n",
+ " \"inspection_score\": 95,\n",
+ " },\n",
+ " {\n",
+ " \"inspection_id\": \"1797_20160705\",\n",
+ " \"business_address\": \"2872 24th St\",\n",
+ " \"business_city\": \"San Francisco\",\n",
+ " \"business_id\": \"1797\",\n",
+ " \"business_location\": {\"lon\": -122.409752, \"lat\": 37.752807},\n",
+ " \"business_name\": \"TIO CHILOS GRILL\",\n",
+ " \"business_postal_code\": \"94110\",\n",
+ " \"business_state\": \"CA\",\n",
+ " \"inspection_date\": \"2016-07-05T00:00:00.000\",\n",
+ " \"inspection_score\": 90,\n",
+ " \"inspection_type\": \"Routine - Unscheduled\",\n",
+ " \"risk_category\": \"Low Risk\",\n",
+ " \"violation_description\": \"Unclean nonfood contact surfaces\",\n",
+ " \"violation_id\": \"1797_20160705_103142\",\n",
+ " },\n",
+ " {\n",
+ " \"inspection_id\": \"66198_20160527\",\n",
+ " \"business_address\": \"1661 Tennessee St Suite 3B\",\n",
+ " \"business_city\": \"San Francisco Whard Restaurant\",\n",
+ " \"business_id\": \"66198\",\n",
+ " \"business_location\": {\"lon\": -122.388478, \"lat\": 37.75072},\n",
+ " \"business_name\": \"San Francisco Restaurant\",\n",
+ " \"business_postal_code\": \"94107\",\n",
+ " \"business_state\": \"CA\",\n",
+ " \"inspection_date\": \"2016-05-27T00:00:00.000\",\n",
+ " \"inspection_type\": \"Routine\",\n",
+ " \"inspection_score\": 56,\n",
+ " },\n",
+ " {\n",
+ " \"inspection_id\": \"5794_20160907\",\n",
+ " \"business_address\": \"2162 24th Ave\",\n",
+ " \"business_city\": \"San Francisco\",\n",
+ " \"business_id\": \"5794\",\n",
+ " \"business_location\": {\"lon\": -122.481299, \"lat\": 37.747228},\n",
+ " \"business_name\": \"Soup House\",\n",
+ " \"business_phone_number\": \"+14155752700\",\n",
+ " \"business_postal_code\": \"94116\",\n",
+ " \"business_state\": \"CA\",\n",
+ " \"inspection_date\": \"2016-09-07T00:00:00.000\",\n",
+ " \"inspection_score\": 96,\n",
+ " \"inspection_type\": \"Routine - Unscheduled\",\n",
+ " \"risk_category\": \"Low Risk\",\n",
+ " \"violation_description\": \"Unapproved or unmaintained equipment or utensils\",\n",
+ " \"violation_id\": \"5794_20160907_103144\",\n",
+ " },\n",
+ " \n",
+ " # duplicate record\n",
+ " {\n",
+ " \"inspection_id\": \"5794_20160907\",\n",
+ " \"business_address\": \"2162 24th Ave\",\n",
+ " \"business_city\": \"San Francisco\",\n",
+ " \"business_id\": \"5794\",\n",
+ " \"business_location\": {\"lon\": -122.481299, \"lat\": 37.747228},\n",
+ " \"business_name\": \"Soup-or-Salad\",\n",
+ " \"business_phone_number\": \"+14155752700\",\n",
+ " \"business_postal_code\": \"94116\",\n",
+ " \"business_state\": \"CA\",\n",
+ " \"inspection_date\": \"2016-09-07T00:00:00.000\",\n",
+ " \"inspection_score\": 96,\n",
+ " \"inspection_type\": \"Routine - Unscheduled\",\n",
+ " \"risk_category\": \"Low Risk\",\n",
+ " \"violation_description\": \"Unapproved or unmaintained equipment or utensils\",\n",
+ " \"violation_id\": \"5794_20160907_103144\",\n",
+ " },\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. Indexing (load)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Index documents (no Pandas)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Indexing: 100% (6/6)|####################################|Elapsed Time: 0:00:01"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'success': 6, 'errors': []}"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# index documents w/o providing keys (_id is auto-generated)\n",
+ "wr.opensearch.index_documents(\n",
+ " client,\n",
+ " documents=sf_restaurants_inspections,\n",
+ " index=\"sf_restaurants_inspections\" \n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " _id | \n",
+ " business_name | \n",
+ " inspection_id | \n",
+ " business_location.lon | \n",
+ " business_location.lat | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 663dd72d-0da4-495b-b0ae-ed000105ae73 | \n",
+ " TIO CHILOS GRILL | \n",
+ " 1797_20160705 | \n",
+ " -122.409752 | \n",
+ " 37.752807 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " ff2f50f6-5415-4706-9bcb-af7c5eb0afa3 | \n",
+ " Soup House | \n",
+ " 5794_20160907 | \n",
+ " -122.481299 | \n",
+ " 37.747228 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " b9e8f6a2-8fd1-4660-b041-2997a1a80984 | \n",
+ " San Francisco Soup Company | \n",
+ " 24936_20160609 | \n",
+ " -122.400152 | \n",
+ " 37.793199 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 56b352e6-102b-4eff-8296-7e1fb2459bab | \n",
+ " Soup Unlimited | \n",
+ " 60354_20161123 | \n",
+ " -122.409061 | \n",
+ " 37.783527 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 6fec5411-f79a-48e4-be7b-e0e44d5ebbab | \n",
+ " San Francisco Restaurant | \n",
+ " 66198_20160527 | \n",
+ " -122.388478 | \n",
+ " 37.750720 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 7ba4fb17-f9a9-49da-b90e-8b3553d6d97c | \n",
+ " Soup-or-Salad | \n",
+ " 5794_20160907 | \n",
+ " -122.481299 | \n",
+ " 37.747228 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " _id business_name \\\n",
+ "0 663dd72d-0da4-495b-b0ae-ed000105ae73 TIO CHILOS GRILL \n",
+ "1 ff2f50f6-5415-4706-9bcb-af7c5eb0afa3 Soup House \n",
+ "2 b9e8f6a2-8fd1-4660-b041-2997a1a80984 San Francisco Soup Company \n",
+ "3 56b352e6-102b-4eff-8296-7e1fb2459bab Soup Unlimited \n",
+ "4 6fec5411-f79a-48e4-be7b-e0e44d5ebbab San Francisco Restaurant \n",
+ "5 7ba4fb17-f9a9-49da-b90e-8b3553d6d97c Soup-or-Salad \n",
+ "\n",
+ " inspection_id business_location.lon business_location.lat \n",
+ "0 1797_20160705 -122.409752 37.752807 \n",
+ "1 5794_20160907 -122.481299 37.747228 \n",
+ "2 24936_20160609 -122.400152 37.793199 \n",
+ "3 60354_20161123 -122.409061 37.783527 \n",
+ "4 66198_20160527 -122.388478 37.750720 \n",
+ "5 5794_20160907 -122.481299 37.747228 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# read all documents. There are total 6 documents\n",
+ "wr.opensearch.search(\n",
+ " client,\n",
+ " index=\"sf_restaurants_inspections\",\n",
+ " _source=[\"inspection_id\", \"business_name\", \"business_location\"]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Index json file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "df = pd.DataFrame(sf_restaurants_inspections)\n",
+ "path = f\"s3://{bucket}/json/sf_restaurants_inspections.json\"\n",
+ "wr.s3.to_json(df, path,orient='records',lines=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Indexing: 100% (6/6)|####################################|Elapsed Time: 0:00:00"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'success': 6, 'errors': []}"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# index json w/ providing keys\n",
+ "wr.opensearch.index_json(\n",
+ " client,\n",
+ " path=path, # path can be s3 or local\n",
+ " index=\"sf_restaurants_inspections_dedup\",\n",
+ " id_keys=[\"inspection_id\"] # can be multiple fields. arg applicable to all index_* functions\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " _id | \n",
+ " business_name | \n",
+ " inspection_id | \n",
+ " business_location.lon | \n",
+ " business_location.lat | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 24936_20160609 | \n",
+ " San Francisco Soup Company | \n",
+ " 24936_20160609 | \n",
+ " -122.400152 | \n",
+ " 37.793199 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 66198_20160527 | \n",
+ " San Francisco Restaurant | \n",
+ " 66198_20160527 | \n",
+ " -122.388478 | \n",
+ " 37.750720 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5794_20160907 | \n",
+ " Soup-or-Salad | \n",
+ " 5794_20160907 | \n",
+ " -122.481299 | \n",
+ " 37.747228 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 60354_20161123 | \n",
+ " Soup Unlimited | \n",
+ " 60354_20161123 | \n",
+ " -122.409061 | \n",
+ " 37.783527 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1797_20160705 | \n",
+ " TIO CHILOS GRILL | \n",
+ " 1797_20160705 | \n",
+ " -122.409752 | \n",
+ " 37.752807 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " _id business_name inspection_id \\\n",
+ "0 24936_20160609 San Francisco Soup Company 24936_20160609 \n",
+ "1 66198_20160527 San Francisco Restaurant 66198_20160527 \n",
+ "2 5794_20160907 Soup-or-Salad 5794_20160907 \n",
+ "3 60354_20161123 Soup Unlimited 60354_20161123 \n",
+ "4 1797_20160705 TIO CHILOS GRILL 1797_20160705 \n",
+ "\n",
+ " business_location.lon business_location.lat \n",
+ "0 -122.400152 37.793199 \n",
+ "1 -122.388478 37.750720 \n",
+ "2 -122.481299 37.747228 \n",
+ "3 -122.409061 37.783527 \n",
+ "4 -122.409752 37.752807 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# now there are no duplicates. There are total 5 documents\n",
+ "wr.opensearch.search(\n",
+ " client,\n",
+ " index=\"sf_restaurants_inspections_dedup\",\n",
+ " _source=[\"inspection_id\", \"business_name\", \"business_location\"]\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Index CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Indexing: 100% (1000/1000)|##############################|Elapsed Time: 0:00:00"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'success': 1000, 'errors': []}"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "wr.opensearch.index_csv(\n",
+ " client, \n",
+ " index=\"nyc_restaurants_inspections_sample\", \n",
+ " path='https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv?accessType=DOWNLOAD', # index_csv supports local, s3 and url path\n",
+ " id_keys=[\"CAMIS\"],\n",
+ " pandas_kwargs={'na_filter': True, 'nrows': 1000}, # pandas.read_csv() args - https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html\n",
+ " bulk_size=500 # modify based on your cluster size\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " _id | \n",
+ " CAMIS | \n",
+ " DBA | \n",
+ " BORO | \n",
+ " BUILDING | \n",
+ " STREET | \n",
+ " ZIPCODE | \n",
+ " PHONE | \n",
+ " CUISINE DESCRIPTION | \n",
+ " INSPECTION DATE | \n",
+ " ... | \n",
+ " RECORD DATE | \n",
+ " INSPECTION TYPE | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ " Community Board | \n",
+ " Council District | \n",
+ " Census Tract | \n",
+ " BIN | \n",
+ " BBL | \n",
+ " NTA | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 41610426 | \n",
+ " 41610426 | \n",
+ " GLOW THAI RESTAURANT | \n",
+ " Brooklyn | \n",
+ " 7107 | \n",
+ " 3 AVENUE | \n",
+ " 11209.0 | \n",
+ " 7187481920 | \n",
+ " Thai | \n",
+ " 02/26/2020 | \n",
+ " ... | \n",
+ " 10/04/2021 | \n",
+ " Cycle Inspection / Re-inspection | \n",
+ " 40.633865 | \n",
+ " -74.026798 | \n",
+ " 310.0 | \n",
+ " 43.0 | \n",
+ " 6800.0 | \n",
+ " 3146519.0 | \n",
+ " 3.058910e+09 | \n",
+ " BK31 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 40811162 | \n",
+ " 40811162 | \n",
+ " CARMINE'S | \n",
+ " Manhattan | \n",
+ " 2450 | \n",
+ " BROADWAY | \n",
+ " 10024.0 | \n",
+ " 2123622200 | \n",
+ " Italian | \n",
+ " 05/28/2019 | \n",
+ " ... | \n",
+ " 10/04/2021 | \n",
+ " Cycle Inspection / Initial Inspection | \n",
+ " 40.791168 | \n",
+ " -73.974308 | \n",
+ " 107.0 | \n",
+ " 6.0 | \n",
+ " 17900.0 | \n",
+ " 1033560.0 | \n",
+ " 1.012380e+09 | \n",
+ " MN12 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 50012113 | \n",
+ " 50012113 | \n",
+ " TANG | \n",
+ " Queens | \n",
+ " 196-50 | \n",
+ " NORTHERN BOULEVARD | \n",
+ " 11358.0 | \n",
+ " 7182797080 | \n",
+ " Korean | \n",
+ " 08/16/2018 | \n",
+ " ... | \n",
+ " 10/04/2021 | \n",
+ " Cycle Inspection / Initial Inspection | \n",
+ " 40.757850 | \n",
+ " -73.784593 | \n",
+ " 411.0 | \n",
+ " 19.0 | \n",
+ " 145101.0 | \n",
+ " 4124565.0 | \n",
+ " 4.055200e+09 | \n",
+ " QN48 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 50014618 | \n",
+ " 50014618 | \n",
+ " TOTTO RAMEN | \n",
+ " Manhattan | \n",
+ " 248 | \n",
+ " EAST 52 STREET | \n",
+ " 10022.0 | \n",
+ " 2124210052 | \n",
+ " Japanese | \n",
+ " 08/20/2018 | \n",
+ " ... | \n",
+ " 10/04/2021 | \n",
+ " Cycle Inspection / Re-inspection | \n",
+ " 40.756596 | \n",
+ " -73.968749 | \n",
+ " 106.0 | \n",
+ " 4.0 | \n",
+ " 9800.0 | \n",
+ " 1038490.0 | \n",
+ " 1.013250e+09 | \n",
+ " MN19 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 50045782 | \n",
+ " 50045782 | \n",
+ " OLLIE'S CHINESE RESTAURANT | \n",
+ " Manhattan | \n",
+ " 2705 | \n",
+ " BROADWAY | \n",
+ " 10025.0 | \n",
+ " 2129323300 | \n",
+ " Chinese | \n",
+ " 10/21/2019 | \n",
+ " ... | \n",
+ " 10/04/2021 | \n",
+ " Cycle Inspection / Re-inspection | \n",
+ " 40.799318 | \n",
+ " -73.968440 | \n",
+ " 107.0 | \n",
+ " 6.0 | \n",
+ " 19100.0 | \n",
+ " 1056562.0 | \n",
+ " 1.018750e+09 | \n",
+ " MN12 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " _id CAMIS DBA BORO BUILDING \\\n",
+ "0 41610426 41610426 GLOW THAI RESTAURANT Brooklyn 7107 \n",
+ "1 40811162 40811162 CARMINE'S Manhattan 2450 \n",
+ "2 50012113 50012113 TANG Queens 196-50 \n",
+ "3 50014618 50014618 TOTTO RAMEN Manhattan 248 \n",
+ "4 50045782 50045782 OLLIE'S CHINESE RESTAURANT Manhattan 2705 \n",
+ "\n",
+ " STREET ZIPCODE PHONE CUISINE DESCRIPTION \\\n",
+ "0 3 AVENUE 11209.0 7187481920 Thai \n",
+ "1 BROADWAY 10024.0 2123622200 Italian \n",
+ "2 NORTHERN BOULEVARD 11358.0 7182797080 Korean \n",
+ "3 EAST 52 STREET 10022.0 2124210052 Japanese \n",
+ "4 BROADWAY 10025.0 2129323300 Chinese \n",
+ "\n",
+ " INSPECTION DATE ... RECORD DATE INSPECTION TYPE \\\n",
+ "0 02/26/2020 ... 10/04/2021 Cycle Inspection / Re-inspection \n",
+ "1 05/28/2019 ... 10/04/2021 Cycle Inspection / Initial Inspection \n",
+ "2 08/16/2018 ... 10/04/2021 Cycle Inspection / Initial Inspection \n",
+ "3 08/20/2018 ... 10/04/2021 Cycle Inspection / Re-inspection \n",
+ "4 10/21/2019 ... 10/04/2021 Cycle Inspection / Re-inspection \n",
+ "\n",
+ " Latitude Longitude Community Board Council District Census Tract \\\n",
+ "0 40.633865 -74.026798 310.0 43.0 6800.0 \n",
+ "1 40.791168 -73.974308 107.0 6.0 17900.0 \n",
+ "2 40.757850 -73.784593 411.0 19.0 145101.0 \n",
+ "3 40.756596 -73.968749 106.0 4.0 9800.0 \n",
+ "4 40.799318 -73.968440 107.0 6.0 19100.0 \n",
+ "\n",
+ " BIN BBL NTA \n",
+ "0 3146519.0 3.058910e+09 BK31 \n",
+ "1 1033560.0 1.012380e+09 MN12 \n",
+ "2 4124565.0 4.055200e+09 QN48 \n",
+ "3 1038490.0 1.013250e+09 MN19 \n",
+ "4 1056562.0 1.018750e+09 MN12 \n",
+ "\n",
+ "[5 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "wr.opensearch.search(\n",
+ " client,\n",
+ " index=\"nyc_restaurants_inspections_sample\",\n",
+ " size=5\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 3. Search\n",
+ "#### Search results are returned as Pandas DataFrame"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.1 Search by DSL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " _id | \n",
+ " business_name | \n",
+ " inspection_id | \n",
+ " business_location.lon | \n",
+ " business_location.lat | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " ff2f50f6-5415-4706-9bcb-af7c5eb0afa3 | \n",
+ " Soup House | \n",
+ " 5794_20160907 | \n",
+ " -122.481299 | \n",
+ " 37.747228 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 7ba4fb17-f9a9-49da-b90e-8b3553d6d97c | \n",
+ " Soup-or-Salad | \n",
+ " 5794_20160907 | \n",
+ " -122.481299 | \n",
+ " 37.747228 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " b9e8f6a2-8fd1-4660-b041-2997a1a80984 | \n",
+ " San Francisco Soup Company | \n",
+ " 24936_20160609 | \n",
+ " -122.400152 | \n",
+ " 37.793199 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 56b352e6-102b-4eff-8296-7e1fb2459bab | \n",
+ " Soup Unlimited | \n",
+ " 60354_20161123 | \n",
+ " -122.409061 | \n",
+ " 37.783527 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " _id business_name \\\n",
+ "0 ff2f50f6-5415-4706-9bcb-af7c5eb0afa3 Soup House \n",
+ "1 7ba4fb17-f9a9-49da-b90e-8b3553d6d97c Soup-or-Salad \n",
+ "2 b9e8f6a2-8fd1-4660-b041-2997a1a80984 San Francisco Soup Company \n",
+ "3 56b352e6-102b-4eff-8296-7e1fb2459bab Soup Unlimited \n",
+ "\n",
+ " inspection_id business_location.lon business_location.lat \n",
+ "0 5794_20160907 -122.481299 37.747228 \n",
+ "1 5794_20160907 -122.481299 37.747228 \n",
+ "2 24936_20160609 -122.400152 37.793199 \n",
+ "3 60354_20161123 -122.409061 37.783527 "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# add a search query. search all soup businesses \n",
+ "wr.opensearch.search(\n",
+ " client,\n",
+ " index=\"sf_restaurants_inspections\",\n",
+ " _source=[\"inspection_id\", \"business_name\", \"business_location\"],\n",
+ " filter_path=[\"hits.hits._id\",\"hits.hits._source\"],\n",
+ " search_body={\n",
+ " \"query\": {\n",
+ " \"match\": {\n",
+ " \"business_name\": \"soup\"\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.1 Search by SQL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " _index | \n",
+ " _type | \n",
+ " _id | \n",
+ " _score | \n",
+ " business_name | \n",
+ " inspection_score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " sf_restaurants_inspections_dedup | \n",
+ " _doc | \n",
+ " 5794_20160907 | \n",
+ " None | \n",
+ " Soup-or-Salad | \n",
+ " 96 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " sf_restaurants_inspections_dedup | \n",
+ " _doc | \n",
+ " 60354_20161123 | \n",
+ " None | \n",
+ " Soup Unlimited | \n",
+ " 95 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " sf_restaurants_inspections_dedup | \n",
+ " _doc | \n",
+ " 24936_20160609 | \n",
+ " None | \n",
+ " San Francisco Soup Company | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " _index _type _id _score \\\n",
+ "0 sf_restaurants_inspections_dedup _doc 5794_20160907 None \n",
+ "1 sf_restaurants_inspections_dedup _doc 60354_20161123 None \n",
+ "2 sf_restaurants_inspections_dedup _doc 24936_20160609 None \n",
+ "\n",
+ " business_name inspection_score \n",
+ "0 Soup-or-Salad 96 \n",
+ "1 Soup Unlimited 95 \n",
+ "2 San Francisco Soup Company 77 "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "wr.opensearch.search_by_sql(\n",
+ " client,\n",
+ " sql_query=\"\"\"SELECT business_name, inspection_score \n",
+ " FROM sf_restaurants_inspections_dedup\n",
+ " WHERE business_name LIKE '%soup%'\n",
+ " ORDER BY inspection_score DESC LIMIT 5\"\"\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 4. Delete Indices"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "jupyter": {
+ "outputs_hidden": false
+ },
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'acknowledged': True}"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "wr.opensearch.delete_index(\n",
+ " client=client,\n",
+ " index=\"sf_restaurants_inspections\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 5. Bonus - Prepare data and index from DataFrame"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For this exercise we'll use [DOHMH New York City Restaurant Inspection Results dataset](https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/43nn-pn8j)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv('https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv?accessType=DOWNLOAD')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Prepare the data for indexing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# fields names underscore casing \n",
+ "df.columns = [col.lower().replace(' ', '_') for col in df.columns]\n",
+ "\n",
+ "# convert lon/lat to OpenSearch geo_point\n",
+ "df['business_location'] = \"POINT (\" + df.longitude.fillna('0').astype(str) + \" \" + df.latitude.fillna('0').astype(str) + \")\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create index with mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'acknowledged': True,\n",
+ " 'shards_acknowledged': True,\n",
+ " 'index': 'nyc_restaurants_inspections'}"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# delete index if exists\n",
+ "wr.opensearch.delete_index(\n",
+ " client=client,\n",
+ " index=\"nyc_restaurants\"\n",
+ " \n",
+ ")\n",
+ "\n",
+ "# use dynamic_template to map date fields\n",
+ "# define business_location as geo_point\n",
+ "wr.opensearch.create_index(\n",
+ " client=client,\n",
+ " index=\"nyc_restaurants_inspections\",\n",
+ " mappings={\n",
+ " \"dynamic_templates\" : [\n",
+ " {\n",
+ " \"dates\" : {\n",
+ " \"match\" : \"*date\",\n",
+ " \"mapping\" : {\n",
+ " \"type\" : \"date\",\n",
+ " \"format\" : 'MM/dd/yyyy'\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " ],\n",
+ " \"properties\": {\n",
+ " \"business_location\": {\n",
+ " \"type\": \"geo_point\"\n",
+ " }\n",
+ " }\n",
+ " } \n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### Index dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Indexing: 100% (382655/382655)|##########################|Elapsed Time: 0:04:15"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'success': 382655, 'errors': []}"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "wr.opensearch.index_df(\n",
+ " client,\n",
+ " df=df,\n",
+ " index=\"nyc_restaurants_inspections\",\n",
+ " id_keys=[\"camis\"],\n",
+ " bulk_size=1000\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Execute geo query\n",
+ "#### Sort restaurants by distance from Times-Square"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " camis | \n",
+ " dba | \n",
+ " boro | \n",
+ " building | \n",
+ " street | \n",
+ " zipcode | \n",
+ " phone | \n",
+ " cuisine_description | \n",
+ " inspection_date | \n",
+ " action | \n",
+ " ... | \n",
+ " inspection_type | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " community_board | \n",
+ " council_district | \n",
+ " census_tract | \n",
+ " bin | \n",
+ " bbl | \n",
+ " nta | \n",
+ " business_location | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 41551304 | \n",
+ " THE COUNTER | \n",
+ " Manhattan | \n",
+ " 7 | \n",
+ " TIMES SQUARE | \n",
+ " 10036.0 | \n",
+ " 2129976801 | \n",
+ " American | \n",
+ " 12/22/2016 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Cycle Inspection / Initial Inspection | \n",
+ " 40.755908 | \n",
+ " -73.986681 | \n",
+ " 105.0 | \n",
+ " 3.0 | \n",
+ " 11300.0 | \n",
+ " 1086069.0 | \n",
+ " 1.009940e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.986680953809 40.755907817312) | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 50055665 | \n",
+ " ANN INC CAFE | \n",
+ " Manhattan | \n",
+ " 7 | \n",
+ " TIMES SQUARE | \n",
+ " 10036.0 | \n",
+ " 2125413287 | \n",
+ " American | \n",
+ " 12/11/2019 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Cycle Inspection / Initial Inspection | \n",
+ " 40.755908 | \n",
+ " -73.986681 | \n",
+ " 105.0 | \n",
+ " 3.0 | \n",
+ " 11300.0 | \n",
+ " 1086069.0 | \n",
+ " 1.009940e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.986680953809 40.755907817312) | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 50049552 | \n",
+ " ERNST AND YOUNG | \n",
+ " Manhattan | \n",
+ " 5 | \n",
+ " TIMES SQ | \n",
+ " 10036.0 | \n",
+ " 2127739994 | \n",
+ " Coffee/Tea | \n",
+ " 11/30/2018 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Cycle Inspection / Initial Inspection | \n",
+ " 40.755702 | \n",
+ " -73.987208 | \n",
+ " 105.0 | \n",
+ " 3.0 | \n",
+ " 11300.0 | \n",
+ " 1024656.0 | \n",
+ " 1.010130e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.987207980138 40.755702020307) | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 50014078 | \n",
+ " RED LOBSTER | \n",
+ " Manhattan | \n",
+ " 5 | \n",
+ " TIMES SQ | \n",
+ " 10036.0 | \n",
+ " 2127306706 | \n",
+ " Seafood | \n",
+ " 10/03/2017 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Cycle Inspection / Initial Inspection | \n",
+ " 40.755702 | \n",
+ " -73.987208 | \n",
+ " 105.0 | \n",
+ " 3.0 | \n",
+ " 11300.0 | \n",
+ " 1024656.0 | \n",
+ " 1.010130e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.987207980138 40.755702020307) | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 50015171 | \n",
+ " NEW AMSTERDAM THEATER | \n",
+ " Manhattan | \n",
+ " 214 | \n",
+ " WEST 42 STREET | \n",
+ " 10036.0 | \n",
+ " 2125825472 | \n",
+ " American | \n",
+ " 06/26/2018 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Cycle Inspection / Re-inspection | \n",
+ " 40.756317 | \n",
+ " -73.987652 | \n",
+ " 105.0 | \n",
+ " 3.0 | \n",
+ " 11300.0 | \n",
+ " 1024660.0 | \n",
+ " 1.010130e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.987651832547 40.756316895053) | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 95 | \n",
+ " 41552060 | \n",
+ " PROSKAUER ROSE | \n",
+ " Manhattan | \n",
+ " 11 | \n",
+ " TIMES SQUARE | \n",
+ " 10036.0 | \n",
+ " 2129695493 | \n",
+ " American | \n",
+ " 08/11/2017 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Administrative Miscellaneous / Initial Inspection | \n",
+ " 40.756891 | \n",
+ " -73.990023 | \n",
+ " 105.0 | \n",
+ " 3.0 | \n",
+ " 11300.0 | \n",
+ " 1087978.0 | \n",
+ " 1.010138e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.990023200823 40.756890780426) | \n",
+ "
\n",
+ " \n",
+ " | 96 | \n",
+ " 41242148 | \n",
+ " GABBY O'HARA'S | \n",
+ " Manhattan | \n",
+ " 123 | \n",
+ " WEST 39 STREET | \n",
+ " 10018.0 | \n",
+ " 2122788984 | \n",
+ " Irish | \n",
+ " 07/30/2019 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Cycle Inspection / Re-inspection | \n",
+ " 40.753405 | \n",
+ " -73.986602 | \n",
+ " 105.0 | \n",
+ " 4.0 | \n",
+ " 11300.0 | \n",
+ " 1080611.0 | \n",
+ " 1.008150e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.986602050292 40.753404587174) | \n",
+ "
\n",
+ " \n",
+ " | 97 | \n",
+ " 50095860 | \n",
+ " THE TIMES EATERY | \n",
+ " Manhattan | \n",
+ " 680 | \n",
+ " 8 AVENUE | \n",
+ " 10036.0 | \n",
+ " 6463867787 | \n",
+ " American | \n",
+ " 02/28/2020 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Pre-permit (Operational) / Initial Inspection | \n",
+ " 40.757991 | \n",
+ " -73.989218 | \n",
+ " 105.0 | \n",
+ " 3.0 | \n",
+ " 11900.0 | \n",
+ " 1024703.0 | \n",
+ " 1.010150e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.989218092096 40.757991356019) | \n",
+ "
\n",
+ " \n",
+ " | 98 | \n",
+ " 50072861 | \n",
+ " ITSU | \n",
+ " Manhattan | \n",
+ " 530 | \n",
+ " 7 AVENUE | \n",
+ " 10018.0 | \n",
+ " 9176393645 | \n",
+ " Asian/Asian Fusion | \n",
+ " 09/10/2018 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Pre-permit (Operational) / Initial Inspection | \n",
+ " 40.753844 | \n",
+ " -73.988551 | \n",
+ " 105.0 | \n",
+ " 3.0 | \n",
+ " 11300.0 | \n",
+ " 1014485.0 | \n",
+ " 1.007880e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.988551029682 40.753843959794) | \n",
+ "
\n",
+ " \n",
+ " | 99 | \n",
+ " 50068109 | \n",
+ " LUKE'S LOBSTER | \n",
+ " Manhattan | \n",
+ " 1407 | \n",
+ " BROADWAY | \n",
+ " 10018.0 | \n",
+ " 9174759192 | \n",
+ " Seafood | \n",
+ " 09/06/2017 | \n",
+ " Violations were cited in the following area(s). | \n",
+ " ... | \n",
+ " Pre-permit (Operational) / Initial Inspection | \n",
+ " 40.753432 | \n",
+ " -73.987151 | \n",
+ " 105.0 | \n",
+ " 3.0 | \n",
+ " 11300.0 | \n",
+ " 1015265.0 | \n",
+ " 1.008140e+09 | \n",
+ " MN17 | \n",
+ " POINT (-73.98715066791 40.753432097521) | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
100 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " camis dba boro building street \\\n",
+ "0 41551304 THE COUNTER Manhattan 7 TIMES SQUARE \n",
+ "1 50055665 ANN INC CAFE Manhattan 7 TIMES SQUARE \n",
+ "2 50049552 ERNST AND YOUNG Manhattan 5 TIMES SQ \n",
+ "3 50014078 RED LOBSTER Manhattan 5 TIMES SQ \n",
+ "4 50015171 NEW AMSTERDAM THEATER Manhattan 214 WEST 42 STREET \n",
+ ".. ... ... ... ... ... \n",
+ "95 41552060 PROSKAUER ROSE Manhattan 11 TIMES SQUARE \n",
+ "96 41242148 GABBY O'HARA'S Manhattan 123 WEST 39 STREET \n",
+ "97 50095860 THE TIMES EATERY Manhattan 680 8 AVENUE \n",
+ "98 50072861 ITSU Manhattan 530 7 AVENUE \n",
+ "99 50068109 LUKE'S LOBSTER Manhattan 1407 BROADWAY \n",
+ "\n",
+ " zipcode phone cuisine_description inspection_date \\\n",
+ "0 10036.0 2129976801 American 12/22/2016 \n",
+ "1 10036.0 2125413287 American 12/11/2019 \n",
+ "2 10036.0 2127739994 Coffee/Tea 11/30/2018 \n",
+ "3 10036.0 2127306706 Seafood 10/03/2017 \n",
+ "4 10036.0 2125825472 American 06/26/2018 \n",
+ ".. ... ... ... ... \n",
+ "95 10036.0 2129695493 American 08/11/2017 \n",
+ "96 10018.0 2122788984 Irish 07/30/2019 \n",
+ "97 10036.0 6463867787 American 02/28/2020 \n",
+ "98 10018.0 9176393645 Asian/Asian Fusion 09/10/2018 \n",
+ "99 10018.0 9174759192 Seafood 09/06/2017 \n",
+ "\n",
+ " action ... \\\n",
+ "0 Violations were cited in the following area(s). ... \n",
+ "1 Violations were cited in the following area(s). ... \n",
+ "2 Violations were cited in the following area(s). ... \n",
+ "3 Violations were cited in the following area(s). ... \n",
+ "4 Violations were cited in the following area(s). ... \n",
+ ".. ... ... \n",
+ "95 Violations were cited in the following area(s). ... \n",
+ "96 Violations were cited in the following area(s). ... \n",
+ "97 Violations were cited in the following area(s). ... \n",
+ "98 Violations were cited in the following area(s). ... \n",
+ "99 Violations were cited in the following area(s). ... \n",
+ "\n",
+ " inspection_type latitude longitude \\\n",
+ "0 Cycle Inspection / Initial Inspection 40.755908 -73.986681 \n",
+ "1 Cycle Inspection / Initial Inspection 40.755908 -73.986681 \n",
+ "2 Cycle Inspection / Initial Inspection 40.755702 -73.987208 \n",
+ "3 Cycle Inspection / Initial Inspection 40.755702 -73.987208 \n",
+ "4 Cycle Inspection / Re-inspection 40.756317 -73.987652 \n",
+ ".. ... ... ... \n",
+ "95 Administrative Miscellaneous / Initial Inspection 40.756891 -73.990023 \n",
+ "96 Cycle Inspection / Re-inspection 40.753405 -73.986602 \n",
+ "97 Pre-permit (Operational) / Initial Inspection 40.757991 -73.989218 \n",
+ "98 Pre-permit (Operational) / Initial Inspection 40.753844 -73.988551 \n",
+ "99 Pre-permit (Operational) / Initial Inspection 40.753432 -73.987151 \n",
+ "\n",
+ " community_board council_district census_tract bin bbl \\\n",
+ "0 105.0 3.0 11300.0 1086069.0 1.009940e+09 \n",
+ "1 105.0 3.0 11300.0 1086069.0 1.009940e+09 \n",
+ "2 105.0 3.0 11300.0 1024656.0 1.010130e+09 \n",
+ "3 105.0 3.0 11300.0 1024656.0 1.010130e+09 \n",
+ "4 105.0 3.0 11300.0 1024660.0 1.010130e+09 \n",
+ ".. ... ... ... ... ... \n",
+ "95 105.0 3.0 11300.0 1087978.0 1.010138e+09 \n",
+ "96 105.0 4.0 11300.0 1080611.0 1.008150e+09 \n",
+ "97 105.0 3.0 11900.0 1024703.0 1.010150e+09 \n",
+ "98 105.0 3.0 11300.0 1014485.0 1.007880e+09 \n",
+ "99 105.0 3.0 11300.0 1015265.0 1.008140e+09 \n",
+ "\n",
+ " nta business_location \n",
+ "0 MN17 POINT (-73.986680953809 40.755907817312) \n",
+ "1 MN17 POINT (-73.986680953809 40.755907817312) \n",
+ "2 MN17 POINT (-73.987207980138 40.755702020307) \n",
+ "3 MN17 POINT (-73.987207980138 40.755702020307) \n",
+ "4 MN17 POINT (-73.987651832547 40.756316895053) \n",
+ ".. ... ... \n",
+ "95 MN17 POINT (-73.990023200823 40.756890780426) \n",
+ "96 MN17 POINT (-73.986602050292 40.753404587174) \n",
+ "97 MN17 POINT (-73.989218092096 40.757991356019) \n",
+ "98 MN17 POINT (-73.988551029682 40.753843959794) \n",
+ "99 MN17 POINT (-73.98715066791 40.753432097521) \n",
+ "\n",
+ "[100 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "wr.opensearch.search(\n",
+ " client,\n",
+ " index=\"nyc_restaurants_inspections\",\n",
+ " filter_path=[\"hits.hits._source\"],\n",
+ " size=100,\n",
+ " search_body={\n",
+ " \"query\": {\n",
+ " \"match_all\": {}\n",
+ " },\n",
+ " \"sort\": [\n",
+ " {\n",
+ " \"_geo_distance\": {\n",
+ " \"business_location\": { # Times-Square - https://geojson.io/#map=16/40.7563/-73.9862\n",
+ " \"lat\": 40.75613228383523,\n",
+ " \"lon\": -73.9865791797638\n",
+ " },\n",
+ " \"order\": \"asc\"\n",
+ " }\n",
+ " }\n",
+ " ]\n",
+ " }\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}