From 0a83644b8c92a15775e47b15584db61bfe97c289 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 31 Aug 2021 16:09:44 -0400
Subject: [PATCH 01/41] [skip ci] elasticsearch support init: structure and
 skeleton code

---
 awswrangler/elasticsearch/__init__.py |   6 +
 awswrangler/elasticsearch/_read.py    |   3 +
 awswrangler/elasticsearch/_utils.py   |  41 ++++
 awswrangler/elasticsearch/_write.py   | 285 ++++++++++++++++++++++++++
 4 files changed, 335 insertions(+)
 create mode 100644 awswrangler/elasticsearch/__init__.py
 create mode 100644 awswrangler/elasticsearch/_read.py
 create mode 100644 awswrangler/elasticsearch/_utils.py
 create mode 100644 awswrangler/elasticsearch/_write.py

diff --git a/awswrangler/elasticsearch/__init__.py b/awswrangler/elasticsearch/__init__.py
new file mode 100644
index 000000000..bca7b8a7f
--- /dev/null
+++ b/awswrangler/elasticsearch/__init__.py
@@ -0,0 +1,6 @@
+"""Utilities Module for Amazon Elasticsearch."""
+
+from awswrangler.elasticsearch._utils import connect
+from awswrangler.elasticsearch._write import create_index, index_csv, index_documents, index_df, index_json
+
+__all__ = ["connect", "create_index", "index_csv", "index_documents", "index_df", "index_json"]
diff --git a/awswrangler/elasticsearch/_read.py b/awswrangler/elasticsearch/_read.py
new file mode 100644
index 000000000..bf5102ab6
--- /dev/null
+++ b/awswrangler/elasticsearch/_read.py
@@ -0,0 +1,3 @@
+"""Amazon Elasticsearch Read Module (PRIVATE)."""
+
+# TODO: create module
diff --git a/awswrangler/elasticsearch/_utils.py b/awswrangler/elasticsearch/_utils.py
new file mode 100644
index 000000000..51b1b4223
--- /dev/null
+++ b/awswrangler/elasticsearch/_utils.py
@@ -0,0 +1,41 @@
+"""Amazon Elasticsearch Utils Module (PRIVATE)."""
+
+from typing import Optional
+
+import boto3
+
+from awswrangler import _utils, exceptions
+from elasticsearch import Elasticsearch
+
+
+def connect(
+    host: str,
+    boto3_session: Optional[boto3.Session] = None
+) -> Elasticsearch:
+    """Establishes a secure connection to the specified Amazon ES domain.
+
+    Note
+    ----
+    We use [elasticsearch-py](https://elasticsearch-py.readthedocs.io/en/v7.13.4/), an Elasticsearch client for Python,
+    version 7.13.4, which is the recommended version for best compatibility Amazon ES,
+    since later versions may reject connections to Amazon ES clusters.
+    In the future will move to a new open source client under the [OpenSearch project](https://www.opensearch.org/)
+    You can read more here:
+    https://aws.amazon.com/blogs/opensource/keeping-clients-of-opensearch-and-elasticsearch-compatible-with-open-source/
+    https://opensearch.org/docs/clients/index/
+
+    Parameters
+    ----------
+    host : str
+        Amazon Elasticsearch domain, for example: my-test-domain.us-east-1.es.amazonaws.com.
+    boto3_session : boto3.Session(), optional
+        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
+
+    Returns
+    -------
+    elasticsearch.Elasticsearch
+        Elasticsearch low-level client.
+        https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch
+    """
+
+    pass  # connect to Amazon ES
diff --git a/awswrangler/elasticsearch/_write.py b/awswrangler/elasticsearch/_write.py
new file mode 100644
index 000000000..779aa1b6f
--- /dev/null
+++ b/awswrangler/elasticsearch/_write.py
@@ -0,0 +1,285 @@
+"""Amazon Elasticsearch Write Module (PRIVATE)."""
+
+import json
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Mapping, Optional, Union, Tuple, Iterable
+
+import boto3
+import pandas as pd
+
+from elasticsearch import Elasticsearch
+
+_logger: logging.Logger = logging.getLogger(__name__)
+
+
+def create_index(
+    index: str,
+    doc_type: Optional[str] = None,
+    settings: Optional[Dict[str, Any]] = None,
+    mappings: Optional[Dict[str, Any]] = None,
+    boto3_session: Optional[boto3.Session] = None,
+    con: Optional[Elasticsearch] = None
+) -> Dict[str, Any]:
+    """Creates an index.
+
+    Parameters
+    ----------
+    index : str
+        Name of the index.
+    doc_type : str
+        Name of the document type (for Elasticsearch versions 5.x and earlier).
+    settings : Dict[str, Any], optional
+        Index settings
+        https://opensearch.org/docs/opensearch/rest-api/create-index/#index-settings
+    mappings : Dict[str, Any], optional
+        Index mappings
+        https://opensearch.org/docs/opensearch/rest-api/create-index/#mappings
+    boto3_session : boto3.Session(), optional
+        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
+    con : elasticsearch.Elasticsearch, optional
+        Elasticsearch client. A new connection will be established if con receive None.
+
+    Returns
+    -------
+    Dict[str, Any]
+        Elasticsearch rest api response
+        https://opensearch.org/docs/opensearch/rest-api/create-index/#response.
+
+    Examples
+    --------
+    Creating an index.
+
+    >>> import awswrangler as wr
+    >>> response = wr.elasticsearch.create_index(
+    ...     index="sample-index1",
+    ...     mappings={
+    ...        "properties": {
+    ...          "age":  { "type" : "integer" }
+    ...        }
+    ...     },
+    ...     settings={
+    ...         "index": {
+    ...             "number_of_shards": 2,
+    ...             "number_of_replicas": 1
+    ...          }
+    ...     }
+    ... )
+
+    """
+
+
+def index_json(
+    path: Union[str, Path],
+    index: str,
+    doc_type: Optional[str] = None,
+    bulk_params: Optional[Union[List[Any], Tuple[Any], Dict[Any, Any]]] = None,
+    boto3_session: Optional[boto3.Session] = None,
+    **kwargs
+) -> Dict[str, Any]:
+    """Index all documents from JSON file to Elasticsearch index.
+
+    The JSON file should be in a JSON-Lines text format (newline-delimited JSON) - https://jsonlines.org/.
+
+    Parameters
+    ----------
+    path : Union[str, Path]
+        Path as str or Path object to the JSON file which contains the documents.
+    index : str
+        Name of the index.
+    doc_type : str
+        Name of the document type (only for Elasticsearch versions 5.x and earlier).
+    bulk_params :  Union[List, Tuple, Dict], optional
+        List of parameters to pass to bulk operation.
+        References:
+        elasticsearch >= 7.10.2: https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
+        elasticsearch < 7.10.2: https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters
+    boto3_session : boto3.Session(), optional
+        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
+    **kwargs :
+        KEYWORD arguments forwarded to :func:`~awswrangler.elasticsearch.index_documents`
+        which is used to execute the operation
+
+    Returns
+    -------
+    Dict[str, Any]
+        Response payload
+        https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response.
+
+    Examples
+    --------
+    Writing contents of JSON file
+
+    >>> import awswrangler as wr
+    >>> wr.elasticsearch.index_json(
+    ...     path='docs.json',
+    ...     index='sample-index1'
+    ... )
+    """
+    # Loading data from file
+
+    pass  # TODO: load data from json file
+
+
+def index_csv(
+    path: Union[str, Path],
+    index: str,
+    doc_type: Optional[str] = None,
+    boto3_session: Optional[boto3.Session] = None,
+    pandas_params: Optional[Dict[str, Any]] = None
+) -> Dict[str, Any]:
+    """Index all documents from a CSV file to Elasticsearch index.
+
+    Parameters
+    ----------
+    path : Union[str, Path]
+        Path as str or Path object to the CSV file which contains the documents.
+    index : str
+        Name of the index.
+    doc_type : str
+        Name of the document type (only for Elasticsearch versions 5.x and older).
+    boto3_session : boto3.Session(), optional
+        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
+    pandas_params :
+        Dictionary of arguments forwarded to pandas.read_csv().
+        e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
+        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
+
+    Returns
+    -------
+    Dict[str, Any]
+        Response payload
+        https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response.
+
+    Examples
+    --------
+    Writing contents of CSV file
+
+    >>> import awswrangler as wr
+    >>> wr.elasticsearch.index_csv(
+    ...     path='docs.csv',
+    ...     index='sample-index1'
+    ... )
+
+    Writing contents of CSV file using pandas_kwargs
+
+    >>> import awswrangler as wr
+    >>> wr.elasticsearch.index_csv(
+    ...     path='docs.csv',
+    ...     index='sample-index1',
+    ...     pandas_params={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
+    ... )
+    """
+    pass  # TODO: load data from csv file
+
+
+def index_df(
+    df: pd.DataFrame,
+    index: str,
+    doc_type: Optional[str] = None,
+    boto3_session: Optional[boto3.Session] = None,
+) -> Dict[str, Any]:
+    """Index all documents from a DataFrame to Elasticsearch index.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
+    index : str
+        Name of the index.
+    doc_type : str
+        Name of the document type (only for Elasticsearch versions 5.x and older).
+    boto3_session : boto3.Session(), optional
+        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
+
+    Returns
+    -------
+    Dict[str, Any]
+        Response payload
+        https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response.
+
+    Examples
+    --------
+    Writing rows of DataFrame
+
+    >>> import awswrangler as wr
+    >>> import pandas as pd
+    >>> wr.elasticsearch.index_df(
+    ...     df=pd.DataFrame([{'_id': '1'}, {'_id': '2'}, {'_id': '3'}]),
+    ...     index='sample-index1'
+    ... )
+    """
+    pass  # TODO: load data from dataframe
+
+
+def index_documents(
+    documents: Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]],
+    index: str,
+    doc_type: Optional[str] = None,
+    boto3_session: Optional[boto3.Session] = None,
+    con: Optional[Elasticsearch] = None,
+    ignore_status: Optional[Union[List[Any], Tuple[Any]]] = None,
+    chunk_size: Optional[int] = 500,
+    max_chunk_bytes: Optional[int] = 100 * 1024 * 1024,
+    max_retries: Optional[int] = 0,
+    initial_backoff: Optional[int] = 2,
+    max_backoff: Optional[int] = 600,
+    **kwargs
+
+) -> Dict[str, Any]:
+    """Index all documents to Elasticsearch index.
+
+    Note
+    ----
+    Some of the args are referenced from elasticsearch-py client library (bulk helpers)
+    https://elasticsearch-py.readthedocs.io/en/v7.13.4/helpers.html#elasticsearch.helpers.bulk
+    https://elasticsearch-py.readthedocs.io/en/v7.13.4/helpers.html#elasticsearch.helpers.streaming_bulk
+
+    Parameters
+    ----------
+    documents : Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]]
+        List which contains the documents that will be inserted.
+    index : str
+        Name of the index.
+    doc_type : str
+        Name of the document type (only for Elasticsearch versions 5.x and older).
+    boto3_session : boto3.Session(), optional
+        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
+    con : elasticsearch.Elasticsearch, optional
+        Elasticsearch client. A new connection will be established if con receive None.
+    ignore_status:  Union[List[Any], Tuple[Any]], optional
+        list of HTTP status codes that you want to ignore (not raising an exception)
+    chunk_size : int, optional
+        number of docs in one chunk sent to es (default: 500)
+    max_chunk_bytes: int, optional
+        the maximum size of the request in bytes (default: 100MB)
+    max_retries : int, optional
+        maximum number of times a document will be retried when
+        ``429`` is received, set to 0 (default) for no retries on ``429`` (default: 0)
+    initial_backoff : int, optional
+        number of seconds we should wait before the first retry.
+        Any subsequent retries will be powers of ``initial_backoff*2**retry_number`` (default: 2)
+    max_backoff: int, optional
+        maximum number of seconds a retry will wait (default: 600)
+    **kwargs :
+        KEYWORD arguments forwarded to bulk operation
+        elasticsearch >= 7.10.2: https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
+        elasticsearch < 7.10.2: https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters
+
+    Returns
+    -------
+    Dict[str, Any]
+        Response payload
+        https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#response.
+
+    Examples
+    --------
+    Writing documents
+
+    >>> import awswrangler as wr
+    >>> wr.elasticsearch.index_documents(
+    ...     documents=[{'_id': '1', 'value': 'foo'}, {'_id': '2', 'value': 'bar'}],
+    ...     index='sample-index1'
+    ... )
+    """
+    pass  # TODO: load documents

From 947119f003464cffdf117c4068060a0af657407c Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Fri, 10 Sep 2021 11:44:05 -0400
Subject: [PATCH 02/41] [skip ci] rename elasticsearch->opensearch

---
 awswrangler/elasticsearch/_read.py            |  3 --
 .../{elasticsearch => opensearch}/__init__.py |  2 +-
 awswrangler/opensearch/_read.py               |  3 ++
 .../{elasticsearch => opensearch}/_utils.py   | 12 ++++----
 .../{elasticsearch => opensearch}/_write.py   | 30 +++++++++----------
 5 files changed, 25 insertions(+), 25 deletions(-)
 delete mode 100644 awswrangler/elasticsearch/_read.py
 rename awswrangler/{elasticsearch => opensearch}/__init__.py (84%)
 create mode 100644 awswrangler/opensearch/_read.py
 rename awswrangler/{elasticsearch => opensearch}/_utils.py (75%)
 rename awswrangler/{elasticsearch => opensearch}/_write.py (90%)

diff --git a/awswrangler/elasticsearch/_read.py b/awswrangler/elasticsearch/_read.py
deleted file mode 100644
index bf5102ab6..000000000
--- a/awswrangler/elasticsearch/_read.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Amazon Elasticsearch Read Module (PRIVATE)."""
-
-# TODO: create module
diff --git a/awswrangler/elasticsearch/__init__.py b/awswrangler/opensearch/__init__.py
similarity index 84%
rename from awswrangler/elasticsearch/__init__.py
rename to awswrangler/opensearch/__init__.py
index bca7b8a7f..99cf0e6a4 100644
--- a/awswrangler/elasticsearch/__init__.py
+++ b/awswrangler/opensearch/__init__.py
@@ -1,4 +1,4 @@
-"""Utilities Module for Amazon Elasticsearch."""
+"""Utilities Module for Amazon OpenSearch."""
 
 from awswrangler.elasticsearch._utils import connect
 from awswrangler.elasticsearch._write import create_index, index_csv, index_documents, index_df, index_json
diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
new file mode 100644
index 000000000..be813c2bb
--- /dev/null
+++ b/awswrangler/opensearch/_read.py
@@ -0,0 +1,3 @@
+"""Amazon OpenSearch Read Module (PRIVATE)."""
+
+# TODO: create module
diff --git a/awswrangler/elasticsearch/_utils.py b/awswrangler/opensearch/_utils.py
similarity index 75%
rename from awswrangler/elasticsearch/_utils.py
rename to awswrangler/opensearch/_utils.py
index 51b1b4223..f2464bc80 100644
--- a/awswrangler/elasticsearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -1,4 +1,4 @@
-"""Amazon Elasticsearch Utils Module (PRIVATE)."""
+"""Amazon OpenSearch Utils Module (PRIVATE)."""
 
 from typing import Optional
 
@@ -12,13 +12,13 @@ def connect(
     host: str,
     boto3_session: Optional[boto3.Session] = None
 ) -> Elasticsearch:
-    """Establishes a secure connection to the specified Amazon ES domain.
+    """Establishes a secure connection to the specified Amazon OpenSearch domain.
 
     Note
     ----
     We use [elasticsearch-py](https://elasticsearch-py.readthedocs.io/en/v7.13.4/), an Elasticsearch client for Python,
-    version 7.13.4, which is the recommended version for best compatibility Amazon ES,
-    since later versions may reject connections to Amazon ES clusters.
+    version 7.13.4, which is the recommended version for best compatibility Amazon OpenSearch,
+    since later versions may reject connections to Amazon OpenSearch clusters.
     In the future will move to a new open source client under the [OpenSearch project](https://www.opensearch.org/)
     You can read more here:
     https://aws.amazon.com/blogs/opensource/keeping-clients-of-opensearch-and-elasticsearch-compatible-with-open-source/
@@ -27,7 +27,7 @@ def connect(
     Parameters
     ----------
     host : str
-        Amazon Elasticsearch domain, for example: my-test-domain.us-east-1.es.amazonaws.com.
+        Amazon OpenSearch domain, for example: my-test-domain.us-east-1.es.amazonaws.com.
     boto3_session : boto3.Session(), optional
         Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
 
@@ -38,4 +38,4 @@ def connect(
         https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch
     """
 
-    pass  # connect to Amazon ES
+    pass  # connect to Amazon OpenSearch
diff --git a/awswrangler/elasticsearch/_write.py b/awswrangler/opensearch/_write.py
similarity index 90%
rename from awswrangler/elasticsearch/_write.py
rename to awswrangler/opensearch/_write.py
index 779aa1b6f..0ea0ac537 100644
--- a/awswrangler/elasticsearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -1,4 +1,4 @@
-"""Amazon Elasticsearch Write Module (PRIVATE)."""
+"""Amazon OpenSearch Write Module (PRIVATE)."""
 
 import json
 import logging
@@ -43,7 +43,7 @@ def create_index(
     Returns
     -------
     Dict[str, Any]
-        Elasticsearch rest api response
+        OpenSearch rest api response
         https://opensearch.org/docs/opensearch/rest-api/create-index/#response.
 
     Examples
@@ -51,7 +51,7 @@ def create_index(
     Creating an index.
 
     >>> import awswrangler as wr
-    >>> response = wr.elasticsearch.create_index(
+    >>> response = wr.opensearch.create_index(
     ...     index="sample-index1",
     ...     mappings={
     ...        "properties": {
@@ -77,7 +77,7 @@ def index_json(
     boto3_session: Optional[boto3.Session] = None,
     **kwargs
 ) -> Dict[str, Any]:
-    """Index all documents from JSON file to Elasticsearch index.
+    """Index all documents from JSON file to OpenSearch index.
 
     The JSON file should be in a JSON-Lines text format (newline-delimited JSON) - https://jsonlines.org/.
 
@@ -92,12 +92,12 @@ def index_json(
     bulk_params :  Union[List, Tuple, Dict], optional
         List of parameters to pass to bulk operation.
         References:
-        elasticsearch >= 7.10.2: https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
+        elasticsearch >= 7.10.2 / opensearch: https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
         elasticsearch < 7.10.2: https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters
     boto3_session : boto3.Session(), optional
         Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
     **kwargs :
-        KEYWORD arguments forwarded to :func:`~awswrangler.elasticsearch.index_documents`
+        KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
         which is used to execute the operation
 
     Returns
@@ -111,7 +111,7 @@ def index_json(
     Writing contents of JSON file
 
     >>> import awswrangler as wr
-    >>> wr.elasticsearch.index_json(
+    >>> wr.opensearch.index_json(
     ...     path='docs.json',
     ...     index='sample-index1'
     ... )
@@ -128,7 +128,7 @@ def index_csv(
     boto3_session: Optional[boto3.Session] = None,
     pandas_params: Optional[Dict[str, Any]] = None
 ) -> Dict[str, Any]:
-    """Index all documents from a CSV file to Elasticsearch index.
+    """Index all documents from a CSV file to OpenSearch index.
 
     Parameters
     ----------
@@ -156,7 +156,7 @@ def index_csv(
     Writing contents of CSV file
 
     >>> import awswrangler as wr
-    >>> wr.elasticsearch.index_csv(
+    >>> wr.opensearch.index_csv(
     ...     path='docs.csv',
     ...     index='sample-index1'
     ... )
@@ -164,7 +164,7 @@ def index_csv(
     Writing contents of CSV file using pandas_kwargs
 
     >>> import awswrangler as wr
-    >>> wr.elasticsearch.index_csv(
+    >>> wr.opensearch.index_csv(
     ...     path='docs.csv',
     ...     index='sample-index1',
     ...     pandas_params={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
@@ -179,7 +179,7 @@ def index_df(
     doc_type: Optional[str] = None,
     boto3_session: Optional[boto3.Session] = None,
 ) -> Dict[str, Any]:
-    """Index all documents from a DataFrame to Elasticsearch index.
+    """Index all documents from a DataFrame to OpenSearch index.
 
     Parameters
     ----------
@@ -204,7 +204,7 @@ def index_df(
 
     >>> import awswrangler as wr
     >>> import pandas as pd
-    >>> wr.elasticsearch.index_df(
+    >>> wr.opensearch.index_df(
     ...     df=pd.DataFrame([{'_id': '1'}, {'_id': '2'}, {'_id': '3'}]),
     ...     index='sample-index1'
     ... )
@@ -227,7 +227,7 @@ def index_documents(
     **kwargs
 
 ) -> Dict[str, Any]:
-    """Index all documents to Elasticsearch index.
+    """Index all documents to OpenSearch index.
 
     Note
     ----
@@ -263,7 +263,7 @@ def index_documents(
         maximum number of seconds a retry will wait (default: 600)
     **kwargs :
         KEYWORD arguments forwarded to bulk operation
-        elasticsearch >= 7.10.2: https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
+        elasticsearch >= 7.10.2 / opensearch: https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
         elasticsearch < 7.10.2: https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters
 
     Returns
@@ -277,7 +277,7 @@ def index_documents(
     Writing documents
 
     >>> import awswrangler as wr
-    >>> wr.elasticsearch.index_documents(
+    >>> wr.opensearch.index_documents(
     ...     documents=[{'_id': '1', 'value': 'foo'}, {'_id': '2', 'value': 'bar'}],
     ...     index='sample-index1'
     ... )

From 4534d7a5f22934cadd59476498d369df232be6e6 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Mon, 13 Sep 2021 23:08:22 -0400
Subject: [PATCH 03/41] [skip ci] merge Assaf and Murali forks

---
 awswrangler/__init__.py            |   2 +
 awswrangler/opensearch/__init__.py |  15 ++-
 awswrangler/opensearch/_read.py    | 117 ++++++++++++++++++++++-
 awswrangler/opensearch/_utils.py   |  74 ++++++++++++++-
 awswrangler/opensearch/_write.py   | 147 ++++++++++++++++++++++-------
 tests/test_opensearch.py           |  71 ++++++++++++++
 6 files changed, 386 insertions(+), 40 deletions(-)
 create mode 100644 tests/test_opensearch.py

diff --git a/awswrangler/__init__.py b/awswrangler/__init__.py
index ee068e4f6..c87d36823 100644
--- a/awswrangler/__init__.py
+++ b/awswrangler/__init__.py
@@ -17,6 +17,7 @@
     emr,
     exceptions,
     mysql,
+    opensearch,
     postgresql,
     quicksight,
     redshift,
@@ -38,6 +39,7 @@
     "data_api",
     "dynamodb",
     "exceptions",
+    "opensearch",
     "quicksight",
     "s3",
     "sts",
diff --git a/awswrangler/opensearch/__init__.py b/awswrangler/opensearch/__init__.py
index 99cf0e6a4..cd7184e79 100644
--- a/awswrangler/opensearch/__init__.py
+++ b/awswrangler/opensearch/__init__.py
@@ -1,6 +1,15 @@
 """Utilities Module for Amazon OpenSearch."""
 
-from awswrangler.elasticsearch._utils import connect
-from awswrangler.elasticsearch._write import create_index, index_csv, index_documents, index_df, index_json
+from awswrangler.opensearch._utils import connect
+from awswrangler.opensearch._write import create_index, index_csv, index_documents, index_df, index_json
+from awswrangler.opensearch._read import search, search_by_sql
 
-__all__ = ["connect", "create_index", "index_csv", "index_documents", "index_df", "index_json"]
+__all__ = ["connect",
+           "create_index",
+           "index_csv",
+           "index_documents",
+           "index_df",
+           "index_json",
+           "search",
+           "search_by_sql"
+           ]
diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index be813c2bb..e1e735828 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -1,3 +1,118 @@
 """Amazon OpenSearch Read Module (PRIVATE)."""
 
-# TODO: create module
+from pandasticsearch import Select, DataFrame
+from typing import Any, Dict, Optional
+from elasticsearch import Elasticsearch
+from elasticsearch.helpers import scan
+
+
+def _scan(
+    client: Elasticsearch,
+    index: Optional[str] = '_all',
+    search_body: Optional[Dict[str, Any]] = None,
+    doc_type: Optional[str] = None,
+    scroll: Optional[str] = '10m',
+    **kwargs
+):
+    # TODO: write logic based on https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
+    pass
+
+
+def search(
+    client: Elasticsearch,
+    index: Optional[str] = '_all',
+    search_body: Optional[Dict[str, Any]] = None,
+    doc_type: Optional[str] = None,
+    is_scroll: Optional[bool] = False,
+    **kwargs
+) -> DataFrame:
+    """Returns results matching query DSL as pandas dataframe.
+
+    Parameters
+    ----------
+    client : Elasticsearch
+        instance of elasticsearch.Elasticsearch to use.
+    index : str, optional
+        A comma-separated list of index names to search.
+        use `_all` or empty string to perform the operation on all indices.
+    search_body : Dict[str, Any], optional
+        The search definition using the [Query DSL](https://opensearch.org/docs/opensearch/query-dsl/full-text/).
+    doc_type : str, optional
+        Name of the document type (for Elasticsearch versions 5.x and earlier).
+    is_scroll : bool, optional
+        Allows to retrieve a large numbers of results from a single search request using [scroll](https://opensearch.org/docs/opensearch/rest-api/scroll/)
+        for example, for machine learning jobs.
+        Because scroll search contexts consume a lot of memory, we suggest you don’t use the scroll operation for frequent user queries.
+    **kwargs :
+        KEYWORD arguments forwarded to [elasticsearch.Elasticsearch.search](https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch.Elasticsearch.search).
+        If ``is_scroll=True`` arguments will be forwarded to [elasticsearch.helpers.scan](https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan)
+
+    Returns
+    -------
+    Union[pandas.DataFrame, Iterator[pandas.DataFrame]]
+        Results as Pandas DataFrame
+
+    Examples
+    --------
+    Searching an index using query DSL
+
+    >>> import awswrangler as wr
+    >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+    >>> df = wr.opensearch.search(
+    ...         client=client,
+    ...         index='movies',
+    ...         search_body={
+    ...           "query": {
+    ...             "match": {
+    ...               "title": "wind"
+    ...             }
+    ...           }
+    ...         }
+    ...      )
+
+
+    """
+    if doc_type:
+        kwargs['doc_type'] = doc_type
+    if is_scroll:
+        # TODO: write logic
+        # documents = _scan(client, index, search_body, doc_type, **kwargs)
+        pass
+    else:
+        documents = client.search(index=index, body=search_body, **kwargs)
+    df = Select.from_dict(documents).to_pandas()
+    return df
+
+
+def search_by_sql(
+    client: Elasticsearch,
+    sql_query: str
+) -> DataFrame:
+    """Returns results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe
+
+    Parameters
+    ----------
+    client : Elasticsearch
+        instance of elasticsearch.Elasticsearch to use.
+    sql_query : str
+        SQL query
+
+    Returns
+    -------
+    Union[pandas.DataFrame, Iterator[pandas.DataFrame]]
+        Results as Pandas DataFrame
+
+    Examples
+    --------
+    Searching an index using SQL query
+
+    >>> import awswrangler as wr
+    >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+    >>> df = wr.opensearch.search_by_sql(
+    >>>         client=client,
+    >>>         sql_query='SELECT * FROM my-index LIMIT 50'
+    >>>      )
+
+
+    """
+    # TODO: write logic
diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index f2464bc80..1bbfeedf7 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -3,16 +3,41 @@
 from typing import Optional
 
 import boto3
+import logging
 
 from awswrangler import _utils, exceptions
-from elasticsearch import Elasticsearch
+from elasticsearch import Elasticsearch, RequestsHttpConnection
+from requests_aws4auth import AWS4Auth
+
+
+_logger: logging.Logger = logging.getLogger(__name__)
+
+
+def _get_distribution(client: Elasticsearch):
+    return client.info().get('version', {}).get('distribution', 'elasticsearch')
+
+
+def _get_version(client: Elasticsearch):
+    return client.info().get('version', {}).get('number')
+
+
+def _get_version_major(client: Elasticsearch):
+    version = _get_version(client)
+    if version:
+        return int(version.split('.')[0])
+    return None
 
 
 def connect(
     host: str,
-    boto3_session: Optional[boto3.Session] = None
+    port: Optional[int] = 443,
+    boto3_session: Optional[boto3.Session] = boto3.Session(),
+    region: Optional[str] = None,
+    fgac_user: Optional[str] = None,
+    fgac_password: Optional[str] = None
+
 ) -> Elasticsearch:
-    """Establishes a secure connection to the specified Amazon OpenSearch domain.
+    """Creates a secure connection to the specified Amazon OpenSearch domain.
 
     Note
     ----
@@ -24,12 +49,23 @@ def connect(
     https://aws.amazon.com/blogs/opensource/keeping-clients-of-opensearch-and-elasticsearch-compatible-with-open-source/
     https://opensearch.org/docs/clients/index/
 
+    The username and password are mandatory if the OS Cluster uses [Fine Grained Access Control](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html).
+    If fine grained access control is disabled, session access key and secret keys are used.
+
     Parameters
     ----------
     host : str
         Amazon OpenSearch domain, for example: my-test-domain.us-east-1.es.amazonaws.com.
+    port : int
+        OpenSearch Service only accepts connections over port 80 (HTTP) or 443 (HTTPS)
     boto3_session : boto3.Session(), optional
         Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
+    region :
+        AWS region of the Amazon OS domain. If not provided will be extracted from boto3_session.
+    fgac_user :
+        Fine-grained access control user. Mandatory if OS Cluster uses Fine Grained Access Control.
+    fgac_password :
+        Fine-grained access control password. Mandatory if OS Cluster uses Fine Grained Access Control.
 
     Returns
     -------
@@ -38,4 +74,34 @@ def connect(
         https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch
     """
 
-    pass  # connect to Amazon OpenSearch
+    valid_ports = {80, 443}
+
+    if port not in valid_ports:
+        raise ValueError("results: status must be one of %r." % valid_ports)
+
+    if fgac_user and fgac_password:
+        http_auth = (fgac_user, fgac_password)
+    else:
+        if region is None:
+            region = boto3_session.region_name
+        creds = boto3_session.get_credentials()
+        http_auth = AWS4Auth(
+            creds.access_key,
+            creds.secret_key,
+            region,
+            'es',
+            creds.token
+        )
+    try:
+        es = Elasticsearch(
+            host=host,
+            port=port,
+            http_auth=http_auth,
+            use_ssl=True,
+            verify_certs=True,
+            connection_class=RequestsHttpConnection
+        )
+    except Exception as e:
+        _logger.error("Error connecting to Opensearch cluster. Please verify authentication details")
+        raise e
+    return es
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 0ea0ac537..b3d18e237 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -1,33 +1,65 @@
 """Amazon OpenSearch Write Module (PRIVATE)."""
 
-import json
 import logging
+import uuid
 from pathlib import Path
 from typing import Any, Dict, List, Mapping, Optional, Union, Tuple, Iterable
-
-import boto3
+from ._utils import _get_distribution, _get_version_major
 import pandas as pd
 
 from elasticsearch import Elasticsearch
+from elasticsearch.helpers import bulk
 
 _logger: logging.Logger = logging.getLogger(__name__)
 
 
+def _selected_keys(document: Dict, keys_to_write: Optional[List[str]]):
+    if keys_to_write is None:
+        keys_to_write = document.keys()
+    keys_to_write = filter(lambda x: x != '_id', keys_to_write)
+    return {key: document[key] for key in keys_to_write }
+
+
+def _actions_generator(documents: Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]],
+                   index: str,
+                   doc_type: Optional[str],
+                   keys_to_write: Optional[List[str]],
+                   id_keys: Optional[List[str]]):
+    for document in documents:
+        if id_keys:
+            _id = '-'.join(list(map(lambda x: str(document[x]), id_keys)))
+        else:
+            _id = document.get('_id', uuid.uuid4())
+        yield {
+                "_index": index,
+                "_type": doc_type,
+                "_id" : _id,
+                "_source": _selected_keys(document, keys_to_write),
+            }
+
+
+def _df_doc_generator(df: pd.DataFrame):
+    df_iter = df.iterrows()
+    for i, document in df_iter:
+        yield document
+
+
 def create_index(
+    client: Elasticsearch,
     index: str,
     doc_type: Optional[str] = None,
     settings: Optional[Dict[str, Any]] = None,
-    mappings: Optional[Dict[str, Any]] = None,
-    boto3_session: Optional[boto3.Session] = None,
-    con: Optional[Elasticsearch] = None
+    mappings: Optional[Dict[str, Any]] = None
 ) -> Dict[str, Any]:
     """Creates an index.
 
     Parameters
     ----------
+    client : Elasticsearch
+        instance of elasticsearch.Elasticsearch to use.
     index : str
         Name of the index.
-    doc_type : str
+    doc_type : str, optional
         Name of the document type (for Elasticsearch versions 5.x and earlier).
     settings : Dict[str, Any], optional
         Index settings
@@ -35,10 +67,6 @@ def create_index(
     mappings : Dict[str, Any], optional
         Index mappings
         https://opensearch.org/docs/opensearch/rest-api/create-index/#mappings
-    boto3_session : boto3.Session(), optional
-        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
-    con : elasticsearch.Elasticsearch, optional
-        Elasticsearch client. A new connection will be established if con receive None.
 
     Returns
     -------
@@ -51,7 +79,9 @@ def create_index(
     Creating an index.
 
     >>> import awswrangler as wr
+    >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
     >>> response = wr.opensearch.create_index(
+    ...     client=client,
     ...     index="sample-index1",
     ...     mappings={
     ...        "properties": {
@@ -68,13 +98,28 @@ def create_index(
 
     """
 
+    body = {}
+    if mappings:
+        if _get_distribution(client) == 'opensearch' or _get_version_major(client) >= 7:
+            body['mappings'] = mappings  # doc type deprecated
+        else:
+            if doc_type:
+                body['mappings'] = {doc_type: mappings}
+            else:
+                body['mappings'] = {index: mappings}
+    if settings:
+        body['settings'] = settings
+    if body == {}:
+        body = None
+    return client.indices.create(index, body, ignore=[400, 404])
+
 
 def index_json(
+    client: Elasticsearch,
     path: Union[str, Path],
     index: str,
     doc_type: Optional[str] = None,
     bulk_params: Optional[Union[List[Any], Tuple[Any], Dict[Any, Any]]] = None,
-    boto3_session: Optional[boto3.Session] = None,
     **kwargs
 ) -> Dict[str, Any]:
     """Index all documents from JSON file to OpenSearch index.
@@ -83,19 +128,19 @@ def index_json(
 
     Parameters
     ----------
+    client : Elasticsearch
+        instance of elasticsearch.Elasticsearch to use.
     path : Union[str, Path]
         Path as str or Path object to the JSON file which contains the documents.
     index : str
         Name of the index.
-    doc_type : str
+    doc_type : str, optional
         Name of the document type (only for Elasticsearch versions 5.x and earlier).
     bulk_params :  Union[List, Tuple, Dict], optional
         List of parameters to pass to bulk operation.
         References:
         elasticsearch >= 7.10.2 / opensearch: https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
         elasticsearch < 7.10.2: https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters
-    boto3_session : boto3.Session(), optional
-        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
     **kwargs :
         KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
         which is used to execute the operation
@@ -111,7 +156,9 @@ def index_json(
     Writing contents of JSON file
 
     >>> import awswrangler as wr
+    >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
     >>> wr.opensearch.index_json(
+    ...     client=client,
     ...     path='docs.json',
     ...     index='sample-index1'
     ... )
@@ -122,24 +169,24 @@ def index_json(
 
 
 def index_csv(
+    client: Elasticsearch,
     path: Union[str, Path],
     index: str,
     doc_type: Optional[str] = None,
-    boto3_session: Optional[boto3.Session] = None,
     pandas_params: Optional[Dict[str, Any]] = None
 ) -> Dict[str, Any]:
     """Index all documents from a CSV file to OpenSearch index.
 
     Parameters
     ----------
+    client : Elasticsearch
+        instance of elasticsearch.Elasticsearch to use.
     path : Union[str, Path]
         Path as str or Path object to the CSV file which contains the documents.
     index : str
         Name of the index.
-    doc_type : str
+    doc_type : str, optional
         Name of the document type (only for Elasticsearch versions 5.x and older).
-    boto3_session : boto3.Session(), optional
-        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
     pandas_params :
         Dictionary of arguments forwarded to pandas.read_csv().
         e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
@@ -156,7 +203,9 @@ def index_csv(
     Writing contents of CSV file
 
     >>> import awswrangler as wr
+    >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
     >>> wr.opensearch.index_csv(
+    ...     client=client,
     ...     path='docs.csv',
     ...     index='sample-index1'
     ... )
@@ -164,7 +213,9 @@ def index_csv(
     Writing contents of CSV file using pandas_kwargs
 
     >>> import awswrangler as wr
+    >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
     >>> wr.opensearch.index_csv(
+    ...     client=client,
     ...     path='docs.csv',
     ...     index='sample-index1',
     ...     pandas_params={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
@@ -174,23 +225,27 @@ def index_csv(
 
 
 def index_df(
+    client: Elasticsearch,
     df: pd.DataFrame,
     index: str,
     doc_type: Optional[str] = None,
-    boto3_session: Optional[boto3.Session] = None,
+    **kwargs
 ) -> Dict[str, Any]:
     """Index all documents from a DataFrame to OpenSearch index.
 
     Parameters
     ----------
+    client : Elasticsearch
+        instance of elasticsearch.Elasticsearch to use.
     df : pd.DataFrame
         Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
     index : str
         Name of the index.
-    doc_type : str
+    doc_type : str, optional
         Name of the document type (only for Elasticsearch versions 5.x and older).
-    boto3_session : boto3.Session(), optional
-        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
+    **kwargs :
+        KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
+        which is used to execute the operation
 
     Returns
     -------
@@ -204,20 +259,30 @@ def index_df(
 
     >>> import awswrangler as wr
     >>> import pandas as pd
+    >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
     >>> wr.opensearch.index_df(
+    ...     client=client,
     ...     df=pd.DataFrame([{'_id': '1'}, {'_id': '2'}, {'_id': '3'}]),
     ...     index='sample-index1'
     ... )
     """
-    pass  # TODO: load data from dataframe
+
+    return index_documents(
+        client=client,
+        documents=_df_doc_generator(df),
+        index=index,
+        doc_type=doc_type,
+        **kwargs
+    )
 
 
 def index_documents(
+    client: Elasticsearch,
     documents: Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]],
     index: str,
     doc_type: Optional[str] = None,
-    boto3_session: Optional[boto3.Session] = None,
-    con: Optional[Elasticsearch] = None,
+    keys_to_write: Optional[List[str]] = None,
+    id_keys: Optional[List[str]] = None,
     ignore_status: Optional[Union[List[Any], Tuple[Any]]] = None,
     chunk_size: Optional[int] = 500,
     max_chunk_bytes: Optional[int] = 100 * 1024 * 1024,
@@ -237,16 +302,19 @@ def index_documents(
 
     Parameters
     ----------
+    client : Elasticsearch
+        instance of elasticsearch.Elasticsearch to use.
     documents : Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]]
         List which contains the documents that will be inserted.
     index : str
         Name of the index.
-    doc_type : str
+    doc_type : str, optional
         Name of the document type (only for Elasticsearch versions 5.x and older).
-    boto3_session : boto3.Session(), optional
-        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
-    con : elasticsearch.Elasticsearch, optional
-        Elasticsearch client. A new connection will be established if con receive None.
+    keys_to_write : List[str], optional
+        list of keys to index. If not provided all keys will be indexed
+    id_keys : List[str], optional
+        list of keys that compound document unique id. If not provided will use `_id` key if exists,
+        otherwise will generate unique identifier for each document.
     ignore_status:  Union[List[Any], Tuple[Any]], optional
         list of HTTP status codes that you want to ignore (not raising an exception)
     chunk_size : int, optional
@@ -277,9 +345,24 @@ def index_documents(
     Writing documents
 
     >>> import awswrangler as wr
+    >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
     >>> wr.opensearch.index_documents(
     ...     documents=[{'_id': '1', 'value': 'foo'}, {'_id': '2', 'value': 'bar'}],
     ...     index='sample-index1'
     ... )
     """
-    pass  # TODO: load documents
+    success, errors = bulk(
+        client=client,
+        actions=_actions_generator(documents, index, doc_type, keys_to_write=keys_to_write, id_keys=id_keys),
+        ignore_status=ignore_status,
+        chunk_size=chunk_size,
+        max_chunk_bytes=max_chunk_bytes,
+        max_retries=max_retries,
+        initial_backoff=initial_backoff,
+        max_backoff=max_backoff,
+        **kwargs
+    )
+    return {
+        'success': success,
+        'errors': errors
+    }
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
new file mode 100644
index 000000000..01f30084d
--- /dev/null
+++ b/tests/test_opensearch.py
@@ -0,0 +1,71 @@
+import logging
+
+import boto3
+import pandas as pd
+
+
+import awswrangler as wr
+
+
+logging.getLogger("awswrangler").setLevel(logging.DEBUG)
+
+# TODO: create test_infra for opensearch
+OPENSEARCH_DOMAIN = 'search-es71-public-z63iyqxccc4ungar5vx45xwgfi.us-east-1.es.amazonaws.com'  # change to your domain
+OPENSEARCH_DOMAIN_FGAC = 'search-os1-public-urixc6vui2il7oawwiox2e57n4.us-east-1.es.amazonaws.com'
+
+
+def test_connection():
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    print(client.info())
+
+
+# def test_fgac_connection():
+#     client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN_FGAC,
+#                                    fgac_user='admin',
+#                                    fgac_password='SECRET')
+#     print(client.info())
+
+
+def test_create_index():
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    response = wr.opensearch.create_index(
+        client,
+        index='test-index1',
+        mappings={
+            'properties': {
+                'name': {'type': 'text'},
+                'age': {'type': 'integer'}
+            }
+        },
+        settings={
+            'index': {
+                'number_of_shards': 1,
+                'number_of_replicas': 1
+            }
+        }
+    )
+    print(response)
+
+
+def test_index_df():
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    response = wr.opensearch.index_df(client,
+                                      df=pd.DataFrame([{'_id': '1', 'name': 'John'},
+                                                       {'_id': '2', 'name': 'George'},
+                                                       {'_id': '3', 'name': 'Julia'}
+                                                       ]),
+                                      index='test_index_df1'
+                                      )
+    print(response)
+
+
+def test_index_documents():
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    response = wr.opensearch.index_documents(client,
+                                      documents=[{'_id': '1', 'name': 'John'},
+                                                 {'_id': '2', 'name': 'George'},
+                                                 {'_id': '3', 'name': 'Julia'}
+                                                ],
+                                      index='test_index_documents1'
+                                      )
+    print(response)
\ No newline at end of file

From 4e8f4e3c725af0a37a6b0b4c749cbd92f882f51a Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 14 Sep 2021 11:41:15 -0400
Subject: [PATCH 04/41] [skip ci] fixed filter_path pandasticsearch issue

---
 awswrangler/opensearch/_read.py |  6 ++-
 tests/test_opensearch.py        | 81 ++++++++++++++++++++++++++++++++-
 2 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index e1e735828..597005b92 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -45,7 +45,6 @@ def search(
         Because scroll search contexts consume a lot of memory, we suggest you don’t use the scroll operation for frequent user queries.
     **kwargs :
         KEYWORD arguments forwarded to [elasticsearch.Elasticsearch.search](https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch.Elasticsearch.search).
-        If ``is_scroll=True`` arguments will be forwarded to [elasticsearch.helpers.scan](https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan)
 
     Returns
     -------
@@ -74,6 +73,11 @@ def search(
     """
     if doc_type:
         kwargs['doc_type'] = doc_type
+
+    # pandasticsearch.Select.from_dict requires `took` field
+    if 'filter_path' in kwargs:
+        if 'took' not in kwargs['filter_path']:
+            kwargs['filter_path'].append('took')
     if is_scroll:
         # TODO: write logic
         # documents = _scan(client, index, search_body, doc_type, **kwargs)
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 01f30084d..08a1a947c 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -14,6 +14,15 @@
 OPENSEARCH_DOMAIN_FGAC = 'search-os1-public-urixc6vui2il7oawwiox2e57n4.us-east-1.es.amazonaws.com'
 
 
+inspections_documents = [
+{"business_address":"315 California St","business_city":"San Francisco","business_id":"24936","business_latitude":"37.793199","business_location":{"lon": -122.400152,"lat": 37.793199},"business_longitude":"-122.400152","business_name":"San Francisco Soup Company","business_postal_code":"94104","business_state":"CA","inspection_date":"2016-06-09T00:00:00.000","inspection_id":"24936_20160609","inspection_score":77,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Improper food labeling or menu misrepresentation","violation_id":"24936_20160609_103141"},
+{"business_address":"10 Mason St","business_city":"San Francisco","business_id":"60354","business_latitude":"37.783527","business_location":{"lon": -122.409061,"lat": 37.783527},"business_longitude":"-122.409061","business_name":"Soup Unlimited","business_postal_code":"94102","business_state":"CA","inspection_date":"2016-11-23T00:00:00.000","inspection_id":"60354_20161123","inspection_type":"Routine", "inspection_score": 95},
+{"business_address":"2872 24th St","business_city":"San Francisco","business_id":"1797","business_latitude":"37.752807","business_location":{"lon": -122.409752,"lat": 37.752807},"business_longitude":"-122.409752","business_name":"TIO CHILOS GRILL","business_postal_code":"94110","business_state":"CA","inspection_date":"2016-07-05T00:00:00.000","inspection_id":"1797_20160705","inspection_score":90,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Unclean nonfood contact surfaces","violation_id":"1797_20160705_103142"},
+{"business_address":"1661 Tennessee St Suite 3B","business_city":"San Francisco Whard Restaurant","business_id":"66198","business_latitude":"37.75072","business_location":{"lon": -122.388478,"lat": 37.75072},"business_longitude":"-122.388478","business_name":"San Francisco Restaurant","business_postal_code":"94107","business_state":"CA","inspection_date":"2016-05-27T00:00:00.000","inspection_id":"66198_20160527","inspection_type":"Routine","inspection_score":56 },
+{"business_address":"2162 24th Ave","business_city":"San Francisco","business_id":"5794","business_latitude":"37.747228","business_location":{"lon": -122.481299,"lat": 37.747228},"business_longitude":"-122.481299","business_name":"Soup House","business_phone_number":"+14155752700","business_postal_code":"94116","business_state":"CA","inspection_date":"2016-09-07T00:00:00.000","inspection_id":"5794_20160907","inspection_score":96,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Unapproved or unmaintained equipment or utensils","violation_id":"5794_20160907_103144"},
+{"business_address":"2162 24th Ave","business_city":"San Francisco","business_id":"5794","business_latitude":"37.747228","business_location":{"lon": -122.481299,"lat": 37.747228},"business_longitude":"-122.481299","business_name":"Soup-or-Salad","business_phone_number":"+14155752700","business_postal_code":"94116","business_state":"CA","inspection_date":"2016-09-07T00:00:00.000","inspection_id":"5794_20160907","inspection_score":96,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Unapproved or unmaintained equipment or utensils","violation_id":"5794_20160907_103144"}
+]
+
 def test_connection():
     client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
     print(client.info())
@@ -68,4 +77,74 @@ def test_index_documents():
                                                 ],
                                       index='test_index_documents1'
                                       )
-    print(response)
\ No newline at end of file
+    print(response)
+
+
+def test_index_documents_id_keys():
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    response = wr.opensearch.index_documents(client,
+                                             documents=inspections_documents,
+                                             index='test_index_documents_id_keys',
+                                             id_keys=['inspection_id']
+                                             )
+    print(response)
+
+
+def test_index_documents_no_id_keys():
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    response = wr.opensearch.index_documents(client,
+                                             documents=inspections_documents,
+                                             index='test_index_documents_no_id_keys'
+                                             )
+    print(response)
+
+
+def test_search():
+    index = 'test_search'
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    response = wr.opensearch.index_documents(client,
+                                             documents=inspections_documents,
+                                             index=index,
+                                             id_keys=['inspection_id']
+                                             )
+    df = wr.opensearch.search(
+        client,
+        index=index,
+        search_body={
+            "query": {
+                "match": {
+                    "business_name": "soup"
+                }
+            }
+        },
+        _source=['inspection_id', 'business_name', 'business_location']
+    )
+
+    print('')
+    print(df.to_string())
+
+
+def test_search_filter_path():
+    index = 'test_search'
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    response = wr.opensearch.index_documents(client,
+                                             documents=inspections_documents,
+                                             index=index,
+                                             id_keys=['inspection_id']
+                                             )
+    df = wr.opensearch.search(
+        client,
+        index=index,
+        search_body={
+            "query": {
+                "match": {
+                    "business_name": "soup"
+                }
+            }
+        },
+        _source=['inspection_id', 'business_name', 'business_location'],
+        filter_path=['hits.hits._source']
+    )
+
+    print('')
+    print(df.to_string())
\ No newline at end of file

From 7a010cd746e3f5401e5fa9f062731a9e5c71c9d6 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 14 Sep 2021 15:53:55 -0400
Subject: [PATCH 05/41] [skip ci] disable scan for now

---
 awswrangler/opensearch/_read.py | 16 ++--------------
 tests/test_opensearch.py        |  2 +-
 2 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index 597005b92..c20871713 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -6,18 +6,6 @@
 from elasticsearch.helpers import scan
 
 
-def _scan(
-    client: Elasticsearch,
-    index: Optional[str] = '_all',
-    search_body: Optional[Dict[str, Any]] = None,
-    doc_type: Optional[str] = None,
-    scroll: Optional[str] = '10m',
-    **kwargs
-):
-    # TODO: write logic based on https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
-    pass
-
-
 def search(
     client: Elasticsearch,
     index: Optional[str] = '_all',
@@ -79,8 +67,7 @@ def search(
         if 'took' not in kwargs['filter_path']:
             kwargs['filter_path'].append('took')
     if is_scroll:
-        # TODO: write logic
-        # documents = _scan(client, index, search_body, doc_type, **kwargs)
+        # TODO: write logic based on https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
         pass
     else:
         documents = client.search(index=index, body=search_body, **kwargs)
@@ -120,3 +107,4 @@ def search_by_sql(
 
     """
     # TODO: write logic
+    pass
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 08a1a947c..b3f29a88e 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -147,4 +147,4 @@ def test_search_filter_path():
     )
 
     print('')
-    print(df.to_string())
\ No newline at end of file
+    print(df.to_string())

From 79e0a9a8199d401ed4808a799c7a8062dd386d4b Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Wed, 15 Sep 2021 10:49:31 -0400
Subject: [PATCH 06/41] [skip ci] path documentation

---
 awswrangler/opensearch/_write.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index b3d18e237..05fa75f0c 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -131,7 +131,7 @@ def index_json(
     client : Elasticsearch
         instance of elasticsearch.Elasticsearch to use.
     path : Union[str, Path]
-        Path as str or Path object to the JSON file which contains the documents.
+        s3 or local path to the JSON file which contains the documents.
     index : str
         Name of the index.
     doc_type : str, optional
@@ -173,7 +173,7 @@ def index_csv(
     path: Union[str, Path],
     index: str,
     doc_type: Optional[str] = None,
-    pandas_params: Optional[Dict[str, Any]] = None
+    pandas_kwargs: Optional[Dict[str, Any]] = None
 ) -> Dict[str, Any]:
     """Index all documents from a CSV file to OpenSearch index.
 
@@ -182,12 +182,12 @@ def index_csv(
     client : Elasticsearch
         instance of elasticsearch.Elasticsearch to use.
     path : Union[str, Path]
-        Path as str or Path object to the CSV file which contains the documents.
+        s3 or local path to the CSV file which contains the documents.
     index : str
         Name of the index.
     doc_type : str, optional
         Name of the document type (only for Elasticsearch versions 5.x and older).
-    pandas_params :
+    pandas_kwargs :
         Dictionary of arguments forwarded to pandas.read_csv().
         e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
         https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
@@ -218,7 +218,7 @@ def index_csv(
     ...     client=client,
     ...     path='docs.csv',
     ...     index='sample-index1',
-    ...     pandas_params={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
+    ...     pandas_kwargs={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
     ... )
     """
     pass  # TODO: load data from csv file

From f07e698590d6b75f2143ea6d1ecdd61e48a05785 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Wed, 15 Sep 2021 11:06:03 -0400
Subject: [PATCH 07/41] [skip ci] add delete_index

---
 awswrangler/opensearch/__init__.py |  3 +-
 awswrangler/opensearch/_write.py   | 44 +++++++++++++++++++++++++++++-
 tests/test_opensearch.py           | 14 ++++++++++
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/awswrangler/opensearch/__init__.py b/awswrangler/opensearch/__init__.py
index cd7184e79..222519747 100644
--- a/awswrangler/opensearch/__init__.py
+++ b/awswrangler/opensearch/__init__.py
@@ -1,11 +1,12 @@
 """Utilities Module for Amazon OpenSearch."""
 
 from awswrangler.opensearch._utils import connect
-from awswrangler.opensearch._write import create_index, index_csv, index_documents, index_df, index_json
+from awswrangler.opensearch._write import create_index, delete_index, index_csv, index_documents, index_df, index_json
 from awswrangler.opensearch._read import search, search_by_sql
 
 __all__ = ["connect",
            "create_index",
+           "delete_index",
            "index_csv",
            "index_documents",
            "index_df",
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 05fa75f0c..8f629fcbf 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -111,7 +111,49 @@ def create_index(
         body['settings'] = settings
     if body == {}:
         body = None
-    return client.indices.create(index, body, ignore=[400, 404])
+    response = client.indices.create(index, body, ignore=[400, 404])
+    if 'error' in response:
+        _logger.warning(response)
+        if str(response['error']).startswith(u'MapperParsingException'):
+            raise ValueError(response['error'])
+    return response
+
+
+def delete_index(
+    client: Elasticsearch,
+    index: str
+) -> Dict[str, Any]:
+    """Creates an index.
+
+    Parameters
+    ----------
+    client : Elasticsearch
+        instance of elasticsearch.Elasticsearch to use.
+    index : str
+        Name of the index.
+
+    Returns
+    -------
+    Dict[str, Any]
+        OpenSearch rest api response
+        https://opensearch.org/docs/opensearch/rest-api/create-index/#response.
+
+    Examples
+    --------
+    Creating an index.
+
+    >>> import awswrangler as wr
+    >>> client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')
+    >>> response = wr.opensearch.delete_index(
+    ...     client=client,
+    ...     index="sample-index1"
+    ... )
+
+    """
+    response = client.indices.delete(index, ignore=[400, 404])
+    if 'error' in response:
+        _logger.warning(response)
+    return response
 
 
 def index_json(
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index b3f29a88e..2bcc95fa5 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -56,6 +56,20 @@ def test_create_index():
     print(response)
 
 
+def test_delete_index():
+    index = 'test_delete_index'
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    wr.opensearch.create_index(
+        client,
+        index=index
+    )
+    response = wr.opensearch.delete_index(
+        client,
+        index=index
+    )
+    print(response)
+
+
 def test_index_df():
     client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
     response = wr.opensearch.index_df(client,

From 7d7318b8d6a04227bc4f16205e6aa614b44487ae Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Wed, 15 Sep 2021 11:08:01 -0400
Subject: [PATCH 08/41] [skip ci] add delete_index

---
 awswrangler/opensearch/_write.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 8f629fcbf..7fda11578 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -136,7 +136,6 @@ def delete_index(
     -------
     Dict[str, Any]
         OpenSearch rest api response
-        https://opensearch.org/docs/opensearch/rest-api/create-index/#response.
 
     Examples
     --------

From 6b90c936cc7c5d4ad6ef604844f61da5eef85948 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Wed, 15 Sep 2021 13:59:33 -0400
Subject: [PATCH 09/41] [skip ci] add index_json

---
 awswrangler/opensearch/_write.py | 41 +++++++++++++++++++++++++-------
 tests/test_opensearch.py         | 35 +++++++++++++++++++++++++--
 2 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 7fda11578..5a606354f 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -2,9 +2,12 @@
 
 import logging
 import uuid
+import boto3
+import json
 from pathlib import Path
 from typing import Any, Dict, List, Mapping, Optional, Union, Tuple, Iterable
-from ._utils import _get_distribution, _get_version_major
+from awswrangler.opensearch._utils import _get_distribution, _get_version_major
+from awswrangler._utils import parse_path
 import pandas as pd
 
 from elasticsearch import Elasticsearch
@@ -44,6 +47,15 @@ def _df_doc_generator(df: pd.DataFrame):
         yield document
 
 
+def _file_line_generator(path: str, is_json: bool = False):
+    with open(path) as fp:
+        for line in fp:
+            if is_json:
+                yield json.loads(line)
+            else:
+                yield line.strip()
+
+
 def create_index(
     client: Elasticsearch,
     index: str,
@@ -160,7 +172,7 @@ def index_json(
     path: Union[str, Path],
     index: str,
     doc_type: Optional[str] = None,
-    bulk_params: Optional[Union[List[Any], Tuple[Any], Dict[Any, Any]]] = None,
+    boto3_session: Optional[boto3.Session] = boto3.Session(),
     **kwargs
 ) -> Dict[str, Any]:
     """Index all documents from JSON file to OpenSearch index.
@@ -177,11 +189,9 @@ def index_json(
         Name of the index.
     doc_type : str, optional
         Name of the document type (only for Elasticsearch versions 5.x and earlier).
-    bulk_params :  Union[List, Tuple, Dict], optional
-        List of parameters to pass to bulk operation.
-        References:
-        elasticsearch >= 7.10.2 / opensearch: https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
-        elasticsearch < 7.10.2: https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters
+    boto3_session : boto3.Session(), optional
+        Boto3 Session to be used to access s3 if s3 path is provided.
+        The default boto3 Session will be used if boto3_session receive None.
     **kwargs :
         KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
         which is used to execute the operation
@@ -206,7 +216,22 @@ def index_json(
     """
     # Loading data from file
 
-    pass  # TODO: load data from json file
+    if path.startswith("s3://"):
+        bucket, key = parse_path(path)
+        s3 = boto3_session.client('s3')
+        obj = s3.get_object(Bucket=bucket, Key=key)
+        body = obj['Body'].read()
+        lines = body.splitlines()
+        documents = map(lambda x: json.loads(x), lines)
+    else: # local path
+        documents = _file_line_generator(path, is_json=True)
+    return index_documents(
+        client=client,
+        documents=documents,
+        index=index,
+        doc_type=doc_type,
+        **kwargs
+    )
 
 
 def index_csv(
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 2bcc95fa5..a4398a68b 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -2,7 +2,7 @@
 
 import boto3
 import pandas as pd
-
+import json
 
 import awswrangler as wr
 
@@ -12,7 +12,7 @@
 # TODO: create test_infra for opensearch
 OPENSEARCH_DOMAIN = 'search-es71-public-z63iyqxccc4ungar5vx45xwgfi.us-east-1.es.amazonaws.com'  # change to your domain
 OPENSEARCH_DOMAIN_FGAC = 'search-os1-public-urixc6vui2il7oawwiox2e57n4.us-east-1.es.amazonaws.com'
-
+BUCKET = 'mentzera'
 
 inspections_documents = [
 {"business_address":"315 California St","business_city":"San Francisco","business_id":"24936","business_latitude":"37.793199","business_location":{"lon": -122.400152,"lat": 37.793199},"business_longitude":"-122.400152","business_name":"San Francisco Soup Company","business_postal_code":"94104","business_state":"CA","inspection_date":"2016-06-09T00:00:00.000","inspection_id":"24936_20160609","inspection_score":77,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Improper food labeling or menu misrepresentation","violation_id":"24936_20160609_103141"},
@@ -162,3 +162,34 @@ def test_search_filter_path():
 
     print('')
     print(df.to_string())
+
+
+def test_index_json_local():
+    file_path = '/tmp/inspections.json'
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    with open(file_path, 'w') as filehandle:
+        for doc in inspections_documents:
+            filehandle.write('%s\n' % json.dumps(doc))
+    response = wr.opensearch.index_json(
+        client,
+        index='test_index_json_local',
+        path=file_path
+    )
+    print(response)
+
+
+def test_index_json_s3():
+    file_path = '/tmp/inspections.json'
+    s3_key = 'tmp/inspections.json'
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    with open(file_path, 'w') as filehandle:
+        for doc in inspections_documents:
+            filehandle.write('%s\n' % json.dumps(doc))
+    s3 = boto3.client('s3')
+    s3.upload_file(file_path, BUCKET, s3_key)
+    response = wr.opensearch.index_json(
+        client,
+        index='test_index_json_s3',
+        path=f's3://{BUCKET}/{s3_key}'
+    )
+    print(response)
\ No newline at end of file

From 73db6f51d9cac8c2aab675deabb9b146197cf8ff Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Wed, 15 Sep 2021 17:13:18 -0400
Subject: [PATCH 10/41] [skip ci] add index_csv local path

---
 awswrangler/opensearch/_write.py | 43 +++++++++++++++++++++++++++++---
 tests/test_opensearch.py         | 15 +++++++++++
 2 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 5a606354f..ad845c2b1 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -4,11 +4,13 @@
 import uuid
 import boto3
 import json
+import ast
 from pathlib import Path
 from typing import Any, Dict, List, Mapping, Optional, Union, Tuple, Iterable
 from awswrangler.opensearch._utils import _get_distribution, _get_version_major
 from awswrangler._utils import parse_path
 import pandas as pd
+from pandas import notna
 
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import bulk
@@ -42,9 +44,28 @@ def _actions_generator(documents: Union[Iterable[Dict[str, Any]], Iterable[Mappi
 
 
 def _df_doc_generator(df: pd.DataFrame):
+    def _deserialize(v):
+        if isinstance(v, str):
+            v = v.strip()
+            if (v.startswith('{') and v.endswith('}')
+                    or
+                    v.startswith('[') and v.endswith(']')
+            ):
+                try:
+                    v = json.loads(v)
+                except Exception as e:
+                    try:
+                        v = ast.literal_eval(v)  # if properties are enclosed with single quotes
+                    except:
+                        _logger.warning(f'could not convert string to json: {v}')
+                        _logger.warning(e)
+        return v
+
     df_iter = df.iterrows()
     for i, document in df_iter:
-        yield document
+        # print(document)
+        # yield document
+        yield {k: _deserialize(v) for k, v in document.items() if notna(v)}
 
 
 def _file_line_generator(path: str, is_json: bool = False):
@@ -239,7 +260,8 @@ def index_csv(
     path: Union[str, Path],
     index: str,
     doc_type: Optional[str] = None,
-    pandas_kwargs: Optional[Dict[str, Any]] = None
+    pandas_kwargs: Optional[Dict[str, Any]] = {},
+    **kwargs
 ) -> Dict[str, Any]:
     """Index all documents from a CSV file to OpenSearch index.
 
@@ -257,6 +279,9 @@ def index_csv(
         Dictionary of arguments forwarded to pandas.read_csv().
         e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
         https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
+    **kwargs :
+        KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
+        which is used to execute the operation
 
     Returns
     -------
@@ -287,7 +312,19 @@ def index_csv(
     ...     pandas_kwargs={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
     ... )
     """
-    pass  # TODO: load data from csv file
+    custom_pandas_params = {
+        'skip_blank_lines': True,
+        'na_filter': True # will generate Nan value for empty cells. We remove Nan keys in _df_doc_generator
+    }
+    pandas_kwargs.update(custom_pandas_params)
+    df = pd.read_csv(path, **pandas_kwargs)
+    return index_df(
+        client,
+        df=df,
+        index=index,
+        doc_type=doc_type,
+        **kwargs
+    )
 
 
 def index_df(
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index a4398a68b..40f83eae6 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -192,4 +192,19 @@ def test_index_json_s3():
         index='test_index_json_s3',
         path=f's3://{BUCKET}/{s3_key}'
     )
+    print(response)
+
+
+def test_index_csv_local():
+    file_path = '/tmp/inspections.csv'
+    index = 'test_index_csv_local'
+    df=pd.DataFrame(inspections_documents)
+    df.to_csv(file_path, index=False)
+    client = wr.opensearch.connect(OPENSEARCH_DOMAIN)
+    wr.opensearch.delete_index(client, index)
+    response = wr.opensearch.index_csv(
+        client,
+        path=file_path,
+        index=index
+    )
     print(response)
\ No newline at end of file

From 15d8aca40bc9e66df8fcb96d0b9d06c33d42db7d Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Fri, 17 Sep 2021 12:38:03 -0400
Subject: [PATCH 11/41] [skip ci] add is_scroll to search (scan)

---
 awswrangler/opensearch/_read.py | 17 +++++++++++++----
 tests/test_opensearch.py        | 26 ++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index c20871713..0f82f5964 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -4,6 +4,7 @@
 from typing import Any, Dict, Optional
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import scan
+import pandas as pd
 
 
 def search(
@@ -32,7 +33,8 @@ def search(
         for example, for machine learning jobs.
         Because scroll search contexts consume a lot of memory, we suggest you don’t use the scroll operation for frequent user queries.
     **kwargs :
-        KEYWORD arguments forwarded to [elasticsearch.Elasticsearch.search](https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch.Elasticsearch.search).
+        KEYWORD arguments forwarded to [elasticsearch.Elasticsearch.search](https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch.Elasticsearch.search)
+        and also to [elasticsearch.helpers.scan](https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan) if `is_scroll=True`
 
     Returns
     -------
@@ -67,11 +69,18 @@ def search(
         if 'took' not in kwargs['filter_path']:
             kwargs['filter_path'].append('took')
     if is_scroll:
-        # TODO: write logic based on https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
-        pass
+        documents_generator = scan(
+            client,
+            index=index,
+            query=search_body,
+            **kwargs
+        )
+        s = Select()
+        documents = map(lambda x: s.hit_to_row(x), documents_generator)
+        df = pd.DataFrame(documents)
     else:
         documents = client.search(index=index, body=search_body, **kwargs)
-    df = Select.from_dict(documents).to_pandas()
+        df = Select.from_dict(documents).to_pandas()
     return df
 
 
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 40f83eae6..6152da7d8 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -119,7 +119,8 @@ def test_search():
     response = wr.opensearch.index_documents(client,
                                              documents=inspections_documents,
                                              index=index,
-                                             id_keys=['inspection_id']
+                                             id_keys=['inspection_id'],
+                                             refresh='wait_for'
                                              )
     df = wr.opensearch.search(
         client,
@@ -144,7 +145,8 @@ def test_search_filter_path():
     response = wr.opensearch.index_documents(client,
                                              documents=inspections_documents,
                                              index=index,
-                                             id_keys=['inspection_id']
+                                             id_keys=['inspection_id'],
+                                             refresh='wait_for'
                                              )
     df = wr.opensearch.search(
         client,
@@ -164,6 +166,26 @@ def test_search_filter_path():
     print(df.to_string())
 
 
+def test_search_scroll():
+    index = 'test_search_scroll'
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    response = wr.opensearch.index_documents(client,
+                                             documents=inspections_documents,
+                                             index=index,
+                                             id_keys=['inspection_id'],
+                                             refresh='wait_for'
+                                             )
+    df = wr.opensearch.search(
+        client,
+        index=index,
+        is_scroll=True,
+        _source=['inspection_id', 'business_name', 'business_location']
+    )
+
+    print('')
+    print(df.to_string())
+
+
 def test_index_json_local():
     file_path = '/tmp/inspections.json'
     client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)

From e01b1a0f593a764b684bf88732ac9f1a7bf5d826 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Fri, 17 Sep 2021 13:50:51 -0400
Subject: [PATCH 12/41] [skip ci] add search_by_sql

---
 awswrangler/opensearch/_read.py | 36 ++++++++++++++++++++++++++++++---
 tests/test_opensearch.py        | 18 +++++++++++++++++
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index 0f82f5964..80ec509d5 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -4,6 +4,7 @@
 from typing import Any, Dict, Optional
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import scan
+from awswrangler.opensearch._utils import _get_distribution
 import pandas as pd
 
 
@@ -86,7 +87,8 @@ def search(
 
 def search_by_sql(
     client: Elasticsearch,
-    sql_query: str
+    sql_query: str,
+    **kwargs
 ) -> DataFrame:
     """Returns results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe
 
@@ -96,6 +98,8 @@ def search_by_sql(
         instance of elasticsearch.Elasticsearch to use.
     sql_query : str
         SQL query
+    **kwargs :
+        KEYWORD arguments forwarded to request url (e.g.: filter_path, etc.)
 
     Returns
     -------
@@ -115,5 +119,31 @@ def search_by_sql(
 
 
     """
-    # TODO: write logic
-    pass
+
+    # can be used if not passing format
+    def _sql_response_to_docs(response: Dict[str, Any]):
+        header = list(map(lambda x: x['name'], response.get('schema', [])))
+        for datarow in response.get('datarows', []):
+            yield dict(zip(header, datarow))
+
+    if _get_distribution(client) == 'opensearch':
+        url = '/_plugins/_sql'
+    else:
+        url = '/_opendistro/_sql'
+
+    kwargs['format'] = 'json'
+    body = {'query': sql_query}
+    for size_att in ['size', 'fetch_size']:
+        if size_att in kwargs:
+            body['fetch_size'] = kwargs[size_att]
+            del kwargs[size_att]  # unrecognized parameter
+    response = client.transport.perform_request(
+        "POST",
+        url,
+        headers={'Content-Type': 'application/json'},
+        body=body,
+        params=kwargs
+    )
+
+    df = Select.from_dict(response).to_pandas()
+    return df
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 6152da7d8..28fdf5c5c 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -186,6 +186,24 @@ def test_search_scroll():
     print(df.to_string())
 
 
+def test_search_sql():
+    index = 'test_search_sql'
+    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+    response = wr.opensearch.index_documents(client,
+                                             documents=inspections_documents,
+                                             index=index,
+                                             id_keys=['inspection_id'],
+                                             refresh='wait_for'
+                                             )
+    df = wr.opensearch.search_by_sql(
+        client,
+        sql_query=f'select * from {index}'
+    )
+
+    print('')
+    print(df.to_string())
+
+
 def test_index_json_local():
     file_path = '/tmp/inspections.json'
     client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)

From 1e1fe3799a8472172746c9ee061f0af33133f0c4 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Mon, 27 Sep 2021 23:41:50 -0400
Subject: [PATCH 13/41] [skip ci] opensearch test infra

---
 test_infra/app.py                       |   9 +
 test_infra/poetry.lock                  | 670 +++++++++++++++---------
 test_infra/pyproject.toml               |   1 +
 test_infra/scripts/delete-opensearch.sh |   6 +
 test_infra/scripts/deploy-opensearch.sh |   7 +
 test_infra/stacks/opensearch_stack.py   | 113 ++++
 tests/_utils.py                         |   2 +-
 tests/test_opensearch.py                | 148 ++++--
 8 files changed, 657 insertions(+), 299 deletions(-)
 create mode 100755 test_infra/scripts/delete-opensearch.sh
 create mode 100755 test_infra/scripts/deploy-opensearch.sh
 create mode 100644 test_infra/stacks/opensearch_stack.py

diff --git a/test_infra/app.py b/test_infra/app.py
index 4e27aa261..b14c1fc81 100644
--- a/test_infra/app.py
+++ b/test_infra/app.py
@@ -2,6 +2,7 @@
 from aws_cdk import core as cdk
 from stacks.base_stack import BaseStack
 from stacks.databases_stack import DatabasesStack
+from stacks.opensearch_stack import OpenSearchStack
 
 app = cdk.App()
 
@@ -14,4 +15,12 @@
     base.get_key,
 )
 
+OpenSearchStack(
+app,
+    "aws-data-wrangler-opensearch",
+    base.get_vpc,
+    base.get_bucket,
+    base.get_key,
+)
+
 app.synth()
diff --git a/test_infra/poetry.lock b/test_infra/poetry.lock
index f68d38031..c4e5df69b 100644
--- a/test_infra/poetry.lock
+++ b/test_infra/poetry.lock
@@ -1,496 +1,638 @@
 [[package]]
 name = "attrs"
-version = "20.3.0"
+version = "21.2.0"
 description = "Classes Without Boilerplate"
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 
 [package.extras]
-dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "furo", "sphinx", "pre-commit"]
-docs = ["furo", "sphinx", "zope.interface"]
-tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"]
-tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six"]
+dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"]
+docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
+tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"]
+tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"]
 
 [[package]]
 name = "aws-cdk.assets"
-version = "1.115.0"
+version = "1.124.0"
 description = "This module is deprecated. All types are now available under the core module"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-applicationautoscaling"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::ApplicationAutoScaling"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-autoscaling-common" = "1.115.0"
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-autoscaling-common" = "1.124.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-autoscaling-common"
-version = "1.115.0"
+version = "1.124.0"
 description = "Common implementation package for @aws-cdk/aws-autoscaling and @aws-cdk/aws-applicationautoscaling"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-certificatemanager"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::CertificateManager"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-route53" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-cloudformation"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::CloudFormation"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-sns" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-cloudwatch"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::CloudWatch"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-codeguruprofiler"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::CodeGuruProfiler"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-codestarnotifications"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::CodeStarNotifications"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-ec2"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::EC2"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-logs" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.aws-s3-assets" = "1.115.0"
-"aws-cdk.aws-ssm" = "1.115.0"
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
-"aws-cdk.region-info" = "1.115.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-s3-assets" = "1.124.0"
+"aws-cdk.aws-ssm" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
+"aws-cdk.region-info" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-ecr"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::ECR"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-events" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-ecr-assets"
-version = "1.115.0"
+version = "1.124.0"
 description = "Docker image assets deployed to ECR"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.assets" = "1.115.0"
-"aws-cdk.aws-ecr" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.assets" = "1.124.0"
+"aws-cdk.aws-ecr" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-efs"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::EFS"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-events"
-version = "1.115.0"
+version = "1.124.0"
 description = "Amazon EventBridge Construct Library"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-glue"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::Glue"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.assets" = "1.124.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-s3-assets" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-iam"
-version = "1.115.0"
+version = "1.124.0"
 description = "CDK routines for easily assigning correct and minimal IAM permissions"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.region-info" = "1.115.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.region-info" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-kms"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::KMS"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-lambda"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::Lambda"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-applicationautoscaling" = "1.115.0"
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-codeguruprofiler" = "1.115.0"
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-ecr" = "1.115.0"
-"aws-cdk.aws-ecr-assets" = "1.115.0"
-"aws-cdk.aws-efs" = "1.115.0"
-"aws-cdk.aws-events" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-logs" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.aws-s3-assets" = "1.115.0"
-"aws-cdk.aws-signer" = "1.115.0"
-"aws-cdk.aws-sqs" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-applicationautoscaling" = "1.124.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-codeguruprofiler" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-ecr" = "1.124.0"
+"aws-cdk.aws-ecr-assets" = "1.124.0"
+"aws-cdk.aws-efs" = "1.124.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-s3-assets" = "1.124.0"
+"aws-cdk.aws-signer" = "1.124.0"
+"aws-cdk.aws-sqs" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
+"aws-cdk.region-info" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-logs"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::Logs"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-s3-assets" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-s3-assets" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-opensearchservice"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::OpenSearchService"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-certificatemanager" = "1.124.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-route53" = "1.124.0"
+"aws-cdk.aws-secretsmanager" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.custom-resources" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-rds"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::RDS"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-events" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-logs" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.aws-secretsmanager" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-secretsmanager" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-redshift"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::Redshift"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.aws-secretsmanager" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.aws-secretsmanager" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.custom-resources" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-route53"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::Route53"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.custom-resources" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-s3"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::S3"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-events" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-s3-assets"
-version = "1.115.0"
+version = "1.124.0"
 description = "Deploy local files and directories to S3"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.assets" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-s3" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.assets" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-s3" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-sam"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for the AWS Serverless Application Model (SAM) resources"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-secretsmanager"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::SecretsManager"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-ec2" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.aws-lambda" = "1.115.0"
-"aws-cdk.aws-sam" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-sam" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-signer"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::Signer"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.aws-sns"
+version = "1.124.0"
+description = "The CDK Construct Library for AWS::SNS"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-codestarnotifications" = "1.124.0"
+"aws-cdk.aws-events" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.aws-sqs" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-sqs"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::SQS"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-cloudwatch" = "1.115.0"
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-cloudwatch" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.aws-ssm"
-version = "1.115.0"
+version = "1.124.0"
 description = "The CDK Construct Library for AWS::SSM"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.aws-iam" = "1.115.0"
-"aws-cdk.aws-kms" = "1.115.0"
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-"aws-cdk.core" = "1.115.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-kms" = "1.124.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.cloud-assembly-schema"
-version = "1.115.0"
+version = "1.124.0"
 description = "Cloud Assembly Schema"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.core"
-version = "1.115.0"
+version = "1.124.0"
 description = "AWS Cloud Development Kit Core Library"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-"aws-cdk.cx-api" = "1.115.0"
-"aws-cdk.region-info" = "1.115.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+"aws-cdk.cx-api" = "1.124.0"
+"aws-cdk.region-info" = "1.124.0"
 constructs = ">=3.3.69,<4.0.0"
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
+publication = ">=0.0.3"
+
+[[package]]
+name = "aws-cdk.custom-resources"
+version = "1.124.0"
+description = "Constructs for implementing CDK custom resources"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+"aws-cdk.aws-cloudformation" = "1.124.0"
+"aws-cdk.aws-ec2" = "1.124.0"
+"aws-cdk.aws-iam" = "1.124.0"
+"aws-cdk.aws-lambda" = "1.124.0"
+"aws-cdk.aws-logs" = "1.124.0"
+"aws-cdk.aws-sns" = "1.124.0"
+"aws-cdk.core" = "1.124.0"
+constructs = ">=3.3.69,<4.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.cx-api"
-version = "1.115.0"
+version = "1.124.0"
 description = "Cloud executable protocol"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-"aws-cdk.cloud-assembly-schema" = "1.115.0"
-jsii = ">=1.31.0,<2.0.0"
+"aws-cdk.cloud-assembly-schema" = "1.124.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
 name = "aws-cdk.region-info"
-version = "1.115.0"
+version = "1.124.0"
 description = "AWS region information, such as service principal names"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 
 [package.dependencies]
-jsii = ">=1.31.0,<2.0.0"
+jsii = ">=1.34.0,<2.0.0"
 publication = ">=0.0.3"
 
 [[package]]
@@ -509,14 +651,14 @@ dev = ["bumpversion", "wheel", "watchdog", "flake8", "tox", "coverage", "sphinx"
 
 [[package]]
 name = "cattrs"
-version = "1.6.0"
+version = "1.8.0"
 description = "Composable complex class support for attrs and dataclasses."
 category = "main"
 optional = false
 python-versions = ">=3.7,<4.0"
 
 [package.dependencies]
-attrs = "*"
+attrs = ">=20"
 
 [[package]]
 name = "constructs"
@@ -547,17 +689,17 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
 
 [[package]]
 name = "jsii"
-version = "1.32.0"
+version = "1.34.0"
 description = "Python client for jsii runtime"
 category = "main"
 optional = false
 python-versions = "~=3.6"
 
 [package.dependencies]
-attrs = ">=20.1,<21.0"
+attrs = ">=21.2,<22.0"
 cattrs = [
     {version = ">=1.0.0,<1.1.0", markers = "python_version < \"3.7\""},
-    {version = ">=1.6.0,<1.7.0", markers = "python_version >= \"3.7\""},
+    {version = ">=1.8.0,<1.9.0", markers = "python_version >= \"3.7\""},
 ]
 importlib-resources = {version = "*", markers = "python_version < \"3.7\""}
 python-dateutil = "*"
@@ -613,130 +755,158 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.6.2, <3.10"
-content-hash = "6f8430d31b5e3d08bb0393b4c93ca223cc9d49b55bb3045f95326770d74347ca"
+content-hash = "7fe703d54794d69aab0dd6ad5b4017c43defbff76ed9a3fe10e243c422adfea6"
 
 [metadata.files]
 attrs = [
-    {file = "attrs-20.3.0-py2.py3-none-any.whl", hash = "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6"},
-    {file = "attrs-20.3.0.tar.gz", hash = "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"},
+    {file = "attrs-21.2.0-py2.py3-none-any.whl", hash = "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1"},
+    {file = "attrs-21.2.0.tar.gz", hash = "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"},
 ]
 "aws-cdk.assets" = [
-    {file = "aws-cdk.assets-1.115.0.tar.gz", hash = "sha256:e3a569f900451f2f8429a2ad7cd059712f2903d24cbcaa023911f46362496d2d"},
-    {file = "aws_cdk.assets-1.115.0-py3-none-any.whl", hash = "sha256:d7f62fdaf500980cbcb0cab82cd08cb7334683428cfb3c67c68f72371e29109f"},
+    {file = "aws-cdk.assets-1.124.0.tar.gz", hash = "sha256:8097177806b29824a69bbdb5df9ec74f7b360708b51ed860613d38e30414054a"},
+    {file = "aws_cdk.assets-1.124.0-py3-none-any.whl", hash = "sha256:c94b63e36c094111c6a9abb2a9d6c694f3e123034cf5dc23e5293fdc32c44fb3"},
 ]
 "aws-cdk.aws-applicationautoscaling" = [
-    {file = "aws-cdk.aws-applicationautoscaling-1.115.0.tar.gz", hash = "sha256:e174b3247252bfec419389b896267516d2f874ec56456880116f79204ae9e3e5"},
-    {file = "aws_cdk.aws_applicationautoscaling-1.115.0-py3-none-any.whl", hash = "sha256:45eff7fb107924b6ade243e88edae49f14a599ff3afcaf40a73969c45de733b5"},
+    {file = "aws-cdk.aws-applicationautoscaling-1.124.0.tar.gz", hash = "sha256:c3bc89c2754b7ce029c667be9ab1633884bf574d33773a1dc07a3cff1b698670"},
+    {file = "aws_cdk.aws_applicationautoscaling-1.124.0-py3-none-any.whl", hash = "sha256:d0dcc91b3de13ad46b874813877af3746adec3ad9f7380b2408a14cdd848b65c"},
 ]
 "aws-cdk.aws-autoscaling-common" = [
-    {file = "aws-cdk.aws-autoscaling-common-1.115.0.tar.gz", hash = "sha256:b87c84d3e558b20e3bea515d89cb59d633d71e2c8a6e4e859a691f3c06d45c10"},
-    {file = "aws_cdk.aws_autoscaling_common-1.115.0-py3-none-any.whl", hash = "sha256:bc0e56fe4fedd6e5a0d094845c4e1b2681bf60dfb72f2062392ef7edd5b157bd"},
+    {file = "aws-cdk.aws-autoscaling-common-1.124.0.tar.gz", hash = "sha256:03f57fcd34d9e370c0929de63c674bdbf2a8fbe2efed40942e0e2bff1ed1d436"},
+    {file = "aws_cdk.aws_autoscaling_common-1.124.0-py3-none-any.whl", hash = "sha256:1969320c12bf4107346233b3310464c1e752b65a6577c865abb809711cec2c1f"},
+]
+"aws-cdk.aws-certificatemanager" = [
+    {file = "aws-cdk.aws-certificatemanager-1.124.0.tar.gz", hash = "sha256:291e7c29aa406619276dc141a3827b0af15c9a997b6e7dc1a8c59bbfb3aa7df7"},
+    {file = "aws_cdk.aws_certificatemanager-1.124.0-py3-none-any.whl", hash = "sha256:23071000fe931dd817638b059991872fe93a91a1c1d33750f080c536e9aaf302"},
+]
+"aws-cdk.aws-cloudformation" = [
+    {file = "aws-cdk.aws-cloudformation-1.124.0.tar.gz", hash = "sha256:c38efe614113c3bdcb964f6c20742994154392bc78e82c34a299d0f1b26a7c65"},
+    {file = "aws_cdk.aws_cloudformation-1.124.0-py3-none-any.whl", hash = "sha256:9b530359f567555b83dfbb99f7112fdb2ad893176032ff542ce09f7454ce5107"},
 ]
 "aws-cdk.aws-cloudwatch" = [
-    {file = "aws-cdk.aws-cloudwatch-1.115.0.tar.gz", hash = "sha256:adb27916047303bf5748d503dc608041d30ea002b47c4e2c370d2084c1bec8c4"},
-    {file = "aws_cdk.aws_cloudwatch-1.115.0-py3-none-any.whl", hash = "sha256:2b6b5e954f0b2a629d977cb6db93ec38e2c3c6dde43d88369dbc7a64c92d1ce1"},
+    {file = "aws-cdk.aws-cloudwatch-1.124.0.tar.gz", hash = "sha256:221734f8b6f940068714fe00fd68a8a32d767c713b2adb874365482836248f7f"},
+    {file = "aws_cdk.aws_cloudwatch-1.124.0-py3-none-any.whl", hash = "sha256:a9a4abf58e31cb53872601296b41cf8e8d5106807a5775d19a6ac05fbe34bef0"},
 ]
 "aws-cdk.aws-codeguruprofiler" = [
-    {file = "aws-cdk.aws-codeguruprofiler-1.115.0.tar.gz", hash = "sha256:bd8954511616b1ae8e6bd88122de5cb94c7d16b79f051452b490af9ec729124d"},
-    {file = "aws_cdk.aws_codeguruprofiler-1.115.0-py3-none-any.whl", hash = "sha256:48d6a7ea1a372e3e1dbdb0307c7665ba486ef58b80d1d2ebb56cabb03b40af80"},
+    {file = "aws-cdk.aws-codeguruprofiler-1.124.0.tar.gz", hash = "sha256:e37cd801e5b7fa93a0dba84effc36cd94f090b83988c4f165815ba585f7ca866"},
+    {file = "aws_cdk.aws_codeguruprofiler-1.124.0-py3-none-any.whl", hash = "sha256:4d4bd49ea2415d9daf7c3c57403060802e5f523bd476a276f1e00a3e3d73c15d"},
+]
+"aws-cdk.aws-codestarnotifications" = [
+    {file = "aws-cdk.aws-codestarnotifications-1.124.0.tar.gz", hash = "sha256:478486be7e24e455c1fd8a54489de491005997b6ebdc06212a6231e89471414a"},
+    {file = "aws_cdk.aws_codestarnotifications-1.124.0-py3-none-any.whl", hash = "sha256:de73fbcceba282ddf3caf5e74b188e4685108cec845f573986ea3fec1c98beba"},
 ]
 "aws-cdk.aws-ec2" = [
-    {file = "aws-cdk.aws-ec2-1.115.0.tar.gz", hash = "sha256:e819f98e07d3ee24182f23d435bf164ca7bdfdd42e72305d975b2c75a5a57138"},
-    {file = "aws_cdk.aws_ec2-1.115.0-py3-none-any.whl", hash = "sha256:0475af1a07e514136004870c590dd5b187dd4588eb291da4662ed2d7cf5956c7"},
+    {file = "aws-cdk.aws-ec2-1.124.0.tar.gz", hash = "sha256:f7515734cac0ef8eeaa003bef85364c878fad4a90876de313d156cc863199811"},
+    {file = "aws_cdk.aws_ec2-1.124.0-py3-none-any.whl", hash = "sha256:d000d22d87d887dfbc61b82be897234fc58f421b2fbbbc29f002b683b4fdac4f"},
 ]
 "aws-cdk.aws-ecr" = [
-    {file = "aws-cdk.aws-ecr-1.115.0.tar.gz", hash = "sha256:3083470a95283a95275e1f2ad30868f3591d0a5bf432cf4bab360dabe4cb2e29"},
-    {file = "aws_cdk.aws_ecr-1.115.0-py3-none-any.whl", hash = "sha256:695842b3b892b404c3219d8b44b9ad7a8bf1fd1957abb97c618dba47e050108b"},
+    {file = "aws-cdk.aws-ecr-1.124.0.tar.gz", hash = "sha256:cbf940fbb76eb189143df45f67115673faf10a4b8e7f571660822604c9016aad"},
+    {file = "aws_cdk.aws_ecr-1.124.0-py3-none-any.whl", hash = "sha256:1661c6f8fd618ac75da7cdefd36adda747218e4fe27faa44b5df62ecabd0b3f3"},
 ]
 "aws-cdk.aws-ecr-assets" = [
-    {file = "aws-cdk.aws-ecr-assets-1.115.0.tar.gz", hash = "sha256:5450bbcebb89eff84327246c6049a90adefe73ed194bd62778ffeee6facf9042"},
-    {file = "aws_cdk.aws_ecr_assets-1.115.0-py3-none-any.whl", hash = "sha256:8e7e5b2351370b795b12abd0812a3ace241cc46df8d67aecb92410de2bfd7318"},
+    {file = "aws-cdk.aws-ecr-assets-1.124.0.tar.gz", hash = "sha256:b2401b111474413436e664c1652d02d6e053ca946cbbe224a4f9c3c6220005df"},
+    {file = "aws_cdk.aws_ecr_assets-1.124.0-py3-none-any.whl", hash = "sha256:7dc6b6f262baffa37df3ed898d8ae74ef2384793be822a91b91159cb512183ff"},
 ]
 "aws-cdk.aws-efs" = [
-    {file = "aws-cdk.aws-efs-1.115.0.tar.gz", hash = "sha256:eb96d01635283dbee1101fe57e0a19310974c8de02f75d9042adbab44139fe65"},
-    {file = "aws_cdk.aws_efs-1.115.0-py3-none-any.whl", hash = "sha256:8e9e3f0f837e1ff3cfe96da5d700095f24d132c11cc7544f7a9f20024fa27372"},
+    {file = "aws-cdk.aws-efs-1.124.0.tar.gz", hash = "sha256:90aaccea5ff55ae4a3045540f78e007c048709e142d77947aa15ad655ed4c011"},
+    {file = "aws_cdk.aws_efs-1.124.0-py3-none-any.whl", hash = "sha256:282db0bd269535fb19f0101d4fa6b9cb7cf7dcddf2eaf5d04d7f03fef156c9d0"},
 ]
 "aws-cdk.aws-events" = [
-    {file = "aws-cdk.aws-events-1.115.0.tar.gz", hash = "sha256:4ce7f0e894c61849e8157a0170cb74ec5223d18dc613075912f2ef560974856b"},
-    {file = "aws_cdk.aws_events-1.115.0-py3-none-any.whl", hash = "sha256:a817f0f46c027163a30eb5bab254540e00f5e5285bb1e8678dfd724f8f1187c0"},
+    {file = "aws-cdk.aws-events-1.124.0.tar.gz", hash = "sha256:0b6b5ffca233c0b5d7abaf011072ca896463ce391242ffdf7bf4def28dec8213"},
+    {file = "aws_cdk.aws_events-1.124.0-py3-none-any.whl", hash = "sha256:92ba680941365de0f90ad7881b8c2e787c50b85a69bc32e82b4578a3276f810f"},
 ]
 "aws-cdk.aws-glue" = [
-    {file = "aws-cdk.aws-glue-1.115.0.tar.gz", hash = "sha256:a85d344e61cfb3e0953665bcd85fd4b7ac282417fe7099e2c54cc393f62bfa99"},
-    {file = "aws_cdk.aws_glue-1.115.0-py3-none-any.whl", hash = "sha256:ca2780bf366ab2ba74adb98b6a49c95ee6e5dbde2bc5758657cb5d4197c996ce"},
+    {file = "aws-cdk.aws-glue-1.124.0.tar.gz", hash = "sha256:b43f747a2b8480ca848f7ab27b1dd0c7e352c9602fdb039cfc78f5013dbef450"},
+    {file = "aws_cdk.aws_glue-1.124.0-py3-none-any.whl", hash = "sha256:d90bc85ae0d6b03536879d6fa72cdc49cfe1d58451b9e0065786b682dc2f9422"},
 ]
 "aws-cdk.aws-iam" = [
-    {file = "aws-cdk.aws-iam-1.115.0.tar.gz", hash = "sha256:fe4e3138d6544755cbeb2400fd770b583b01906443648a4588085de2e781707f"},
-    {file = "aws_cdk.aws_iam-1.115.0-py3-none-any.whl", hash = "sha256:7ba923894c6ecce33147527dccbf90fdaecc7a5561b2ca9398623f1f063f898c"},
+    {file = "aws-cdk.aws-iam-1.124.0.tar.gz", hash = "sha256:9d779439048832c6f4d5722196a9490d80bb649e56bb4dadc554ea3ae940f797"},
+    {file = "aws_cdk.aws_iam-1.124.0-py3-none-any.whl", hash = "sha256:249fc537532f73c3cd3f59dc635be58535d9e9f9418062214eb664e14b59a6be"},
 ]
 "aws-cdk.aws-kms" = [
-    {file = "aws-cdk.aws-kms-1.115.0.tar.gz", hash = "sha256:1d1feca56bc4c2de722f59a07ee8dc36b6d7a31d70ffe32de5f76c099b2b6322"},
-    {file = "aws_cdk.aws_kms-1.115.0-py3-none-any.whl", hash = "sha256:c692b0cebe2b0106ddc0ec3946a895941176b35411d46b27ae9bfb06cdaa9d6d"},
+    {file = "aws-cdk.aws-kms-1.124.0.tar.gz", hash = "sha256:205e79bc8f8e009bd1b5df236f0336e977eb141c70575a42080e36829358215f"},
+    {file = "aws_cdk.aws_kms-1.124.0-py3-none-any.whl", hash = "sha256:91294f10f02000743eef712da5ba7ea2749b43e4a0ad7d4715c9c95b6a472c10"},
 ]
 "aws-cdk.aws-lambda" = [
-    {file = "aws-cdk.aws-lambda-1.115.0.tar.gz", hash = "sha256:11eec3652671f37d261f991eaf963726fed281c5aafe77e9f83afab899398892"},
-    {file = "aws_cdk.aws_lambda-1.115.0-py3-none-any.whl", hash = "sha256:65000012469a64096d25614c23e22da74a3d15234925cf44b29fd3d63d21b993"},
+    {file = "aws-cdk.aws-lambda-1.124.0.tar.gz", hash = "sha256:801552637c408a693a7b13967da4ec4e8a623f22b90fb0fdfb845c23765e4e29"},
+    {file = "aws_cdk.aws_lambda-1.124.0-py3-none-any.whl", hash = "sha256:50d774d026a8a0ca5089df5c8b2c7cc2ef74db2a4b20c5d049210b154d3af03d"},
 ]
 "aws-cdk.aws-logs" = [
-    {file = "aws-cdk.aws-logs-1.115.0.tar.gz", hash = "sha256:de30016914a17ca59d55f36029aa10fdc800f8fa69f4a5de822898aebbb29a78"},
-    {file = "aws_cdk.aws_logs-1.115.0-py3-none-any.whl", hash = "sha256:8c6adcf54e066a71a6a7031a8592f52f09a01ca0d6a6d1f51080f9996ad7ac52"},
+    {file = "aws-cdk.aws-logs-1.124.0.tar.gz", hash = "sha256:2fba565fc4f12b397bd9df1cd9964c1b35ce1ca65cd618407b6b1777bc43d292"},
+    {file = "aws_cdk.aws_logs-1.124.0-py3-none-any.whl", hash = "sha256:1f4b1ff436f2d0663e6c76264d7d6ee9dd0d90f3d9c09e5e93f1b0f31abbc379"},
+]
+"aws-cdk.aws-opensearchservice" = [
+    {file = "aws-cdk.aws-opensearchservice-1.124.0.tar.gz", hash = "sha256:d1bd4ca9ac9cf38b7c04a5e1e63eefe30e6e5e40adc0134e61d468694c71c4b1"},
+    {file = "aws_cdk.aws_opensearchservice-1.124.0-py3-none-any.whl", hash = "sha256:170417a55884ac8f26b0ae4cc59c085c8c2a0607b18ca906c1ee4d366b737d85"},
 ]
 "aws-cdk.aws-rds" = [
-    {file = "aws-cdk.aws-rds-1.115.0.tar.gz", hash = "sha256:c562843534494ef283474ebd7bba4e44e0b7cb063c0121e20f08ba49749a2a60"},
-    {file = "aws_cdk.aws_rds-1.115.0-py3-none-any.whl", hash = "sha256:7c00e329b6455b4279ad9880c2e033509b27be63b31626413f28558ae8d24a7f"},
+    {file = "aws-cdk.aws-rds-1.124.0.tar.gz", hash = "sha256:20057fc95cda55fc504987dc0395062836dacc72efce2c86051677a1bb6d8d43"},
+    {file = "aws_cdk.aws_rds-1.124.0-py3-none-any.whl", hash = "sha256:bd66c0f76548cee6fb1f100f0e36ab9d5933ef70121b072ae05b3dd26e408ff3"},
 ]
 "aws-cdk.aws-redshift" = [
-    {file = "aws-cdk.aws-redshift-1.115.0.tar.gz", hash = "sha256:758e6e940e7a432d46d144ebf8002af51fbe98d452221725510f01488847f9a3"},
-    {file = "aws_cdk.aws_redshift-1.115.0-py3-none-any.whl", hash = "sha256:311dcb36814434214917ad707689a210016ce1d6286c69d44ec01f5df27a3c7d"},
+    {file = "aws-cdk.aws-redshift-1.124.0.tar.gz", hash = "sha256:70cb4700cdfecad592524cd017a4a859b3d4ae407b3d2fcf329022c1d2faf863"},
+    {file = "aws_cdk.aws_redshift-1.124.0-py3-none-any.whl", hash = "sha256:4df5c19f74194fb9bd7a56e5b89b9312c35b681a322b0c1b0e248874f628ddc4"},
+]
+"aws-cdk.aws-route53" = [
+    {file = "aws-cdk.aws-route53-1.124.0.tar.gz", hash = "sha256:c5137b3c5211632b931d7b79234aec6006f72701c68477086e70c213320639ef"},
+    {file = "aws_cdk.aws_route53-1.124.0-py3-none-any.whl", hash = "sha256:97fe84e53c26c1a713a3b57341c2ecf488db56cc0b6127975656c53206ccd471"},
 ]
 "aws-cdk.aws-s3" = [
-    {file = "aws-cdk.aws-s3-1.115.0.tar.gz", hash = "sha256:73d72900194b944435056faf42c0df21ca7f6a0f941e0bc8d5cdf3de4c0261e9"},
-    {file = "aws_cdk.aws_s3-1.115.0-py3-none-any.whl", hash = "sha256:81f85f3c107f05012a351260640a1bb1911106addbd26f2dd2c22d8c44122053"},
+    {file = "aws-cdk.aws-s3-1.124.0.tar.gz", hash = "sha256:3047305a4e013cb796532027c14908003ffe7af95fe8e214e3470a32a11c09e6"},
+    {file = "aws_cdk.aws_s3-1.124.0-py3-none-any.whl", hash = "sha256:0b08821e3b79c26110068f54aabdb938da55b562dcf2a28a7171d930334ce71a"},
 ]
 "aws-cdk.aws-s3-assets" = [
-    {file = "aws-cdk.aws-s3-assets-1.115.0.tar.gz", hash = "sha256:4aa793512b08d73f0bacb71f72f607a510672d077216cdd1ac307c65bd0751ae"},
-    {file = "aws_cdk.aws_s3_assets-1.115.0-py3-none-any.whl", hash = "sha256:0bb1eea914908a5fc69a505b118e89f7d3097bce309126167b738a0aefd98ec6"},
+    {file = "aws-cdk.aws-s3-assets-1.124.0.tar.gz", hash = "sha256:568d4c598319e3bf1869536be0586b1004d3c43c2133ba94bf9cda4ad4ae5d5d"},
+    {file = "aws_cdk.aws_s3_assets-1.124.0-py3-none-any.whl", hash = "sha256:125c5e3786f2c233512374080553b2a7592efa6a53203764979a1bb987c47338"},
 ]
 "aws-cdk.aws-sam" = [
-    {file = "aws-cdk.aws-sam-1.115.0.tar.gz", hash = "sha256:babca8a6fbf68a32ebf6f1fd54f6a7bc506d60dae007fd6e4b06f1637edd42fd"},
-    {file = "aws_cdk.aws_sam-1.115.0-py3-none-any.whl", hash = "sha256:ece50ab527eb1e5f84f6de2ad503e7cd61a2351dfcb6446274f8099ffabfcfc5"},
+    {file = "aws-cdk.aws-sam-1.124.0.tar.gz", hash = "sha256:39db01a4d88fd05c57dbc4f0c76c2471eab3e75753febc30f2847c546fa8292b"},
+    {file = "aws_cdk.aws_sam-1.124.0-py3-none-any.whl", hash = "sha256:b1ca75d2fb13898ed66cd4ee364cfa0b4f0924ab4583994ec4a7200d10c8c71b"},
 ]
 "aws-cdk.aws-secretsmanager" = [
-    {file = "aws-cdk.aws-secretsmanager-1.115.0.tar.gz", hash = "sha256:6de8204e4bbcbe8df8852646933c1d8d8cb1332374baee9fe780bd2b413e2423"},
-    {file = "aws_cdk.aws_secretsmanager-1.115.0-py3-none-any.whl", hash = "sha256:0acf55659f67ac43c69be9a17e40e382d6122abc8055f092332723e07db15fd9"},
+    {file = "aws-cdk.aws-secretsmanager-1.124.0.tar.gz", hash = "sha256:76d3ded9f20d29520d4e54e15c335718cac4f938aacb4827a2a9f98af417576f"},
+    {file = "aws_cdk.aws_secretsmanager-1.124.0-py3-none-any.whl", hash = "sha256:0b6ae44966600943eb66fc48a93a0ae2bac60c8d6a5ff9c687ad9675b9f2bc5f"},
 ]
 "aws-cdk.aws-signer" = [
-    {file = "aws-cdk.aws-signer-1.115.0.tar.gz", hash = "sha256:9050e46e059edcde6b8e1d80b0d792eb2b4ad36cc00ce0b284d04a15b019b216"},
-    {file = "aws_cdk.aws_signer-1.115.0-py3-none-any.whl", hash = "sha256:3b4b920dd5c8873bb0b60c0d2ae340fad434e7f011296f465d482afc094b25da"},
+    {file = "aws-cdk.aws-signer-1.124.0.tar.gz", hash = "sha256:96dd4ae63b43c7c12fde59f7ebbbea1895964a5f08c6e2ca4a2a1062abcc2399"},
+    {file = "aws_cdk.aws_signer-1.124.0-py3-none-any.whl", hash = "sha256:2fe614e6ce1ea6259d60f3adced41eaefdeace0cf77d961b5fcef815e1f82428"},
+]
+"aws-cdk.aws-sns" = [
+    {file = "aws-cdk.aws-sns-1.124.0.tar.gz", hash = "sha256:21e838c52cdd9bdcd98fc0fbe16ffad2bf10ba6bf31c5bfcdd9f49a8b3479d0c"},
+    {file = "aws_cdk.aws_sns-1.124.0-py3-none-any.whl", hash = "sha256:cb3820fd79643d1c5fb0b69f2b4755900dd16756af0f4c36706d68220a845d8b"},
 ]
 "aws-cdk.aws-sqs" = [
-    {file = "aws-cdk.aws-sqs-1.115.0.tar.gz", hash = "sha256:b24e03f0027fd99c6cdfe604e3a2b3d0d203d616dffafc74f74f6715083e2b08"},
-    {file = "aws_cdk.aws_sqs-1.115.0-py3-none-any.whl", hash = "sha256:cda589452cb4a6db584050e50f14fbe11757fb0b3aff63f50ae663fad5b7bf27"},
+    {file = "aws-cdk.aws-sqs-1.124.0.tar.gz", hash = "sha256:ffed4754784de29473f554e450c6ec1b96c7508a2706406fe8d6442f2a31c58c"},
+    {file = "aws_cdk.aws_sqs-1.124.0-py3-none-any.whl", hash = "sha256:382721ca5d82dce9ec2625e5bae26132151748ee60e1269a0aa91cfd03227ee7"},
 ]
 "aws-cdk.aws-ssm" = [
-    {file = "aws-cdk.aws-ssm-1.115.0.tar.gz", hash = "sha256:960330865ee74485cab510ba1cac5d8d4578e777f1a421b14e8a20895bbe5ac5"},
-    {file = "aws_cdk.aws_ssm-1.115.0-py3-none-any.whl", hash = "sha256:4431c43667b57fe2883a9ef022b277cbd3b62f6ab13cb0b1221513f7f76f2aac"},
+    {file = "aws-cdk.aws-ssm-1.124.0.tar.gz", hash = "sha256:bcfc99a5cdf23849503c72d93b9e5734d11976453004f13ebca2a66aeb3df10c"},
+    {file = "aws_cdk.aws_ssm-1.124.0-py3-none-any.whl", hash = "sha256:4d7335c2ce0200c1ed347422139c9d9b07c71297253ba911470114277996cc76"},
 ]
 "aws-cdk.cloud-assembly-schema" = [
-    {file = "aws-cdk.cloud-assembly-schema-1.115.0.tar.gz", hash = "sha256:d565a8418e0cc05d3471dd48424477528d72bdd7d17adc9a049068559666a3ae"},
-    {file = "aws_cdk.cloud_assembly_schema-1.115.0-py3-none-any.whl", hash = "sha256:0686e6f7e5da48dbd2ff724953d51eb0495b6772bdb17400024bb42e6fe05baf"},
+    {file = "aws-cdk.cloud-assembly-schema-1.124.0.tar.gz", hash = "sha256:d2989a6742ad988fa0f7085ab67fb7ced14f4c3b1a98cc0bf4a0ea1a9358667c"},
+    {file = "aws_cdk.cloud_assembly_schema-1.124.0-py3-none-any.whl", hash = "sha256:77d3f63629b7213c639ffd4c46eb63ce9dd048e9a91a045afa72dcce9576ee6b"},
 ]
 "aws-cdk.core" = [
-    {file = "aws-cdk.core-1.115.0.tar.gz", hash = "sha256:42a691cc183219ce76eb58e17507edf768a0f5eca0ea98661b4b1f16f178b90d"},
-    {file = "aws_cdk.core-1.115.0-py3-none-any.whl", hash = "sha256:93a8e3d87f79af75866bf3f1cfc702dd5664526ec0f70a1c5f7ade82cb1536b1"},
+    {file = "aws-cdk.core-1.124.0.tar.gz", hash = "sha256:bbdc1cf5affc34d0caa549771dc6b41ce467744f8ca727b215f0d89b853f4f0c"},
+    {file = "aws_cdk.core-1.124.0-py3-none-any.whl", hash = "sha256:56c4549161029c707aa527882e4741fca1ef4c46f63a6417e56e968710cfba7c"},
+]
+"aws-cdk.custom-resources" = [
+    {file = "aws-cdk.custom-resources-1.124.0.tar.gz", hash = "sha256:d2be1a1636b65e275521970b9c9accd02718f678ebb074a580b15b695e4b60d5"},
+    {file = "aws_cdk.custom_resources-1.124.0-py3-none-any.whl", hash = "sha256:6c9abcc046a92dc6845c8a81e33ac727da95e0c0d95b3fba0d433de7dae10a61"},
 ]
 "aws-cdk.cx-api" = [
-    {file = "aws-cdk.cx-api-1.115.0.tar.gz", hash = "sha256:10251ef8deaf7acfb7f7356e07c53cd86bbd8725631795e1ce8f8891bcaffad0"},
-    {file = "aws_cdk.cx_api-1.115.0-py3-none-any.whl", hash = "sha256:6c03bc14f8d645e63329cb152b2f1fe339a556c297f1c3ecfa75ca9a981f9dca"},
+    {file = "aws-cdk.cx-api-1.124.0.tar.gz", hash = "sha256:b8ad4e1a2a5545dd256b50d36efb6d59b9b89b4b1034e7b7f9edfdaa476b181b"},
+    {file = "aws_cdk.cx_api-1.124.0-py3-none-any.whl", hash = "sha256:64b6f3ba0313cdea9963f9d210932cf770366a9d860520e1f15e64a26e97c5d6"},
 ]
 "aws-cdk.region-info" = [
-    {file = "aws-cdk.region-info-1.115.0.tar.gz", hash = "sha256:4f6b282fa495c244c1f96deea4aed77e702312373204e34b3bba53da27851974"},
-    {file = "aws_cdk.region_info-1.115.0-py3-none-any.whl", hash = "sha256:b346bdab4bf54a5956fab020bc085b6c2c304f485dd2d09c8fb586728dfe7c11"},
+    {file = "aws-cdk.region-info-1.124.0.tar.gz", hash = "sha256:c28d31226f9000db1375044ea22ba496cc75e8c3db6aa1493a687ff0f89ccdae"},
+    {file = "aws_cdk.region_info-1.124.0-py3-none-any.whl", hash = "sha256:594b5f275766b22864e6111f194cfe7a12713ffc61963d063ce06812fa484728"},
 ]
 cattrs = [
     {file = "cattrs-1.0.0-py2.py3-none-any.whl", hash = "sha256:616972ae3dfa6e623a40ad3cb845420e64942989152774ab055e5c2b2f89f997"},
     {file = "cattrs-1.0.0.tar.gz", hash = "sha256:b7ab5cf8ad127c42eefd01410c1c6e28569a45a255ea80ed968511873c433c7a"},
-    {file = "cattrs-1.6.0-py3-none-any.whl", hash = "sha256:c8de53900e3acad94ca83750eb12bb38aa85ce9114be47177c943e2f0eca63b0"},
-    {file = "cattrs-1.6.0.tar.gz", hash = "sha256:3e2cd5dc8a1006d5da53ddcbf4f0b1dd3a21e294323b257678d0a96721f8253a"},
+    {file = "cattrs-1.8.0-py3-none-any.whl", hash = "sha256:901fb2040529ae8fc9d93f48a2cdf7de3e983312ffb2a164ffa4e9847f253af1"},
+    {file = "cattrs-1.8.0.tar.gz", hash = "sha256:5c121ab06a7cac494813c228721a7feb5a6423b17316eeaebf13f5a03e5b0d53"},
 ]
 constructs = [
     {file = "constructs-3.3.101-py3-none-any.whl", hash = "sha256:0605ea091dda433f0915ba5b3c74bf967d90fb0cf975a5c3b34a7150a3cf48d1"},
@@ -747,8 +917,8 @@ importlib-resources = [
     {file = "importlib_resources-5.2.0.tar.gz", hash = "sha256:22a2c42d8c6a1d30aa8a0e1f57293725bfd5c013d562585e46aff469e0ff78b3"},
 ]
 jsii = [
-    {file = "jsii-1.32.0-py3-none-any.whl", hash = "sha256:c71321c4b74ed2c29edc9943c22a36c60a8626df6e0a7173b9ae41366b1a9cb9"},
-    {file = "jsii-1.32.0.tar.gz", hash = "sha256:b95e7747812e16cafbfde80b714d9b684c7a4ee57a00cbaf8f138d5868bdb2ae"},
+    {file = "jsii-1.34.0-py3-none-any.whl", hash = "sha256:d0a703d0d44bf78bb90529699599d2a58a68ca764f996808e97eafc68e2467de"},
+    {file = "jsii-1.34.0.tar.gz", hash = "sha256:e72ba5fafabdd5b6a3a65bd2cf42302eb87f2fe7c6339bddb808226a91623654"},
 ]
 publication = [
     {file = "publication-0.0.3-py2.py3-none-any.whl", hash = "sha256:0248885351febc11d8a1098d5c8e3ab2dabcf3e8c0c96db1e17ecd12b53afbe6"},
diff --git a/test_infra/pyproject.toml b/test_infra/pyproject.toml
index e6dda67cb..761c315d7 100644
--- a/test_infra/pyproject.toml
+++ b/test_infra/pyproject.toml
@@ -18,3 +18,4 @@ python = ">=3.6.2, <3.10"
 "aws-cdk.aws-rds" = "^1.115.0"
 "aws-cdk.aws-secretsmanager" = "^1.115.0"
 "aws-cdk.aws-ssm" = "^1.115.0"
+"aws-cdk.aws-opensearchservice" = "^1.124.0"
diff --git a/test_infra/scripts/delete-opensearch.sh b/test_infra/scripts/delete-opensearch.sh
new file mode 100755
index 000000000..1c1c01ba2
--- /dev/null
+++ b/test_infra/scripts/delete-opensearch.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+set -e
+
+pushd ..
+cdk destroy aws-data-wrangler-opensearch
+popd
diff --git a/test_infra/scripts/deploy-opensearch.sh b/test_infra/scripts/deploy-opensearch.sh
new file mode 100755
index 000000000..e94818af4
--- /dev/null
+++ b/test_infra/scripts/deploy-opensearch.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -e
+
+pushd ..
+cdk bootstrap
+cdk deploy aws-data-wrangler-opensearch
+popd
diff --git a/test_infra/stacks/opensearch_stack.py b/test_infra/stacks/opensearch_stack.py
new file mode 100644
index 000000000..e32ecbbaa
--- /dev/null
+++ b/test_infra/stacks/opensearch_stack.py
@@ -0,0 +1,113 @@
+from aws_cdk import aws_ec2 as ec2
+from aws_cdk import aws_iam as iam
+from aws_cdk import aws_kms as kms
+from aws_cdk import aws_s3 as s3
+from aws_cdk import aws_secretsmanager as secrets
+from aws_cdk import core as cdk
+from aws_cdk import aws_opensearchservice as opensearch
+
+
+def validate_domain_name(name: str):
+    if not 3 <= len(name) <= 28:
+        raise ValueError(f'invalid domain name ({name}) - bad length ({len(name)})')
+    for c in name:
+        if not ('a' <= c <= 'z'
+                or c.isdigit()
+                or c in ['-']):
+            raise ValueError(f'invalid domain name ({name}) - bad character ("{c}")')
+
+
+class OpenSearchStack(cdk.Stack):  # type: ignore
+    def __init__(
+        self,
+        scope: cdk.Construct,
+        construct_id: str,
+        vpc: ec2.IVpc,
+        bucket: s3.IBucket,
+        key: kms.Key,
+        **kwargs: str,
+    ) -> None:
+        """
+        AWS Data Wrangler Development OpenSearch Infrastructure.
+        Includes OpenSearch, Elasticsearch, ...
+        """
+        super().__init__(scope, construct_id, **kwargs)
+
+        self.vpc = vpc
+        self.key = key
+        self.bucket = bucket
+
+        self._set_opensearch_infra()
+        self._setup_opensearch_1_0()
+        self._setup_elasticsearch_7_10_fgac()
+
+    def _set_opensearch_infra(self) -> None:
+        self.username = "test"
+        # fmt: off
+        self.password_secret = secrets.Secret(
+            self,
+            "opensearch-password-secret",
+            secret_name="aws-data-wrangler/opensearch_password",
+            generate_secret_string=secrets.SecretStringGenerator(exclude_characters="/@\"\' \\"),
+        ).secret_value
+        # fmt: on
+        self.password = self.password_secret.to_string()
+
+    def _setup_opensearch_1_0(self) -> None:
+        domain_name = 'wrangler-os-1-0'
+        validate_domain_name(domain_name)
+        domain_arn = f'arn:aws:es:{self.region}:{self.account}:domain/{domain_name}'
+        domain = opensearch.Domain(self, domain_name,
+            domain_name=domain_name,
+            version=opensearch.EngineVersion.OPENSEARCH_1_0,
+            capacity=opensearch.CapacityConfig(
+                data_node_instance_type='t3.small.search',
+                data_nodes=1
+            ),
+            access_policies=[
+                iam.PolicyStatement(
+                    effect=iam.Effect.ALLOW,
+                    actions=["es:*"],
+                    principals=[iam.AccountRootPrincipal()],
+                    resources=[f"{domain_arn}/*"]
+                )
+            ],
+            removal_policy=cdk.RemovalPolicy.DESTROY
+        )
+
+        cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint)
+
+    def _setup_elasticsearch_7_10_fgac(self) -> None:
+        domain_name = 'wrangler-es-7-10-fgac'
+        validate_domain_name(domain_name)
+        domain_arn = f'arn:aws:es:{self.region}:{self.account}:domain/{domain_name}'
+        domain = opensearch.Domain(self, domain_name,
+            domain_name=domain_name,
+            version=opensearch.EngineVersion.ELASTICSEARCH_7_10,
+            capacity=opensearch.CapacityConfig(
+                data_node_instance_type='t3.small.search',
+                data_nodes=1
+            ),
+            access_policies=[
+                iam.PolicyStatement(
+                    effect=iam.Effect.ALLOW,
+                    actions=["es:*"],
+                    principals=[iam.AnyPrincipal()],  # FGAC
+                    resources=[f"{domain_arn}/*"]
+                )
+            ],
+            fine_grained_access_control=opensearch.AdvancedSecurityOptions(
+                master_user_name=self.username,
+                # master_user_password=self.password_secret.plain_text("aws-data-wrangler/opensearch_password")
+                master_user_password=self.password_secret
+            ),
+            # Node-to-node encryption is required when fine-grained access control is enabled
+            node_to_node_encryption=True,
+            # Encryption-at-rest is required when fine-grained access control is enabled
+            encryption_at_rest=opensearch.EncryptionAtRestOptions(enabled=True, kms_key=self.key),
+            # Enforce HTTPS is required when fine-grained access control is enabled
+            enforce_https=True,
+            removal_policy=cdk.RemovalPolicy.DESTROY
+        )
+
+        cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint)
diff --git a/tests/_utils.py b/tests/_utils.py
index 85df69484..9e4b595e3 100644
--- a/tests/_utils.py
+++ b/tests/_utils.py
@@ -528,7 +528,7 @@ def extract_cloudformation_outputs():
     client = boto3.client("cloudformation")
     response = try_it(client.describe_stacks, botocore.exceptions.ClientError, max_num_tries=5)
     for stack in response.get("Stacks"):
-        if (stack["StackName"] in ["aws-data-wrangler-base", "aws-data-wrangler-databases"]) and (
+        if (stack["StackName"] in ["aws-data-wrangler-base", "aws-data-wrangler-databases", "aws-data-wrangler-opensearch"]) and (
             stack["StackStatus"] in CFN_VALID_STATUS
         ):
             for output in stack.get("Outputs"):
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 28fdf5c5c..84e669757 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -1,18 +1,17 @@
 import logging
 
 import boto3
+import pytest  # type: ignore
 import pandas as pd
 import json
+import tempfile
 
 import awswrangler as wr
 
+from ._utils import extract_cloudformation_outputs
 
 logging.getLogger("awswrangler").setLevel(logging.DEBUG)
 
-# TODO: create test_infra for opensearch
-OPENSEARCH_DOMAIN = 'search-es71-public-z63iyqxccc4ungar5vx45xwgfi.us-east-1.es.amazonaws.com'  # change to your domain
-OPENSEARCH_DOMAIN_FGAC = 'search-os1-public-urixc6vui2il7oawwiox2e57n4.us-east-1.es.amazonaws.com'
-BUCKET = 'mentzera'
 
 inspections_documents = [
 {"business_address":"315 California St","business_city":"San Francisco","business_id":"24936","business_latitude":"37.793199","business_location":{"lon": -122.400152,"lat": 37.793199},"business_longitude":"-122.400152","business_name":"San Francisco Soup Company","business_postal_code":"94104","business_state":"CA","inspection_date":"2016-06-09T00:00:00.000","inspection_id":"24936_20160609","inspection_score":77,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Improper food labeling or menu misrepresentation","violation_id":"24936_20160609_103141"},
@@ -23,23 +22,70 @@
 {"business_address":"2162 24th Ave","business_city":"San Francisco","business_id":"5794","business_latitude":"37.747228","business_location":{"lon": -122.481299,"lat": 37.747228},"business_longitude":"-122.481299","business_name":"Soup-or-Salad","business_phone_number":"+14155752700","business_postal_code":"94116","business_state":"CA","inspection_date":"2016-09-07T00:00:00.000","inspection_id":"5794_20160907","inspection_score":96,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Unapproved or unmaintained equipment or utensils","violation_id":"5794_20160907_103144"}
 ]
 
-def test_connection():
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+
+@pytest.fixture(scope="session")
+def cloudformation_outputs():
+    return extract_cloudformation_outputs()
+
+
+@pytest.fixture(scope="session")
+def opensearch_password():
+    return boto3.client("secretsmanager").get_secret_value(SecretId="aws-data-wrangler/opensearch_password")["SecretString"]
+
+
+@pytest.fixture(scope="session")
+def domain_endpoint_opensearch_1_0(cloudformation_outputs):
+    return cloudformation_outputs["DomainEndpointwrangleros10"]
+
+
+@pytest.fixture(scope="session")
+def domain_endpoint_elasticsearch_7_10_fgac(cloudformation_outputs):
+    return cloudformation_outputs["DomainEndpointwrangleres710fgac"]
+
+
+def test_connection_opensearch_1_0(domain_endpoint_opensearch_1_0):
+    client = wr.opensearch.connect(host=domain_endpoint_opensearch_1_0)
     print(client.info())
+    assert len(client.info()) > 0
 
 
-# def test_fgac_connection():
-#     client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN_FGAC,
-#                                    fgac_user='admin',
-#                                    fgac_password='SECRET')
-#     print(client.info())
+def test_connection_elasticsearch_7_10_fgac(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
+    client = wr.opensearch.connect(
+        host=domain_endpoint_elasticsearch_7_10_fgac,
+        fgac_user='test',
+        fgac_password=opensearch_password
+    )
+    print(client.info())
+    assert len(client.info()) > 0
+
+
+@pytest.fixture(scope="session")
+def opensearch_1_0_client(domain_endpoint_opensearch_1_0):
+    client = wr.opensearch.connect(host=domain_endpoint_opensearch_1_0)
+    return client
+
+
+@pytest.fixture(scope="session")
+def elasticsearch_7_10_fgac_client(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
+    client = wr.opensearch.connect(
+        host=domain_endpoint_elasticsearch_7_10_fgac,
+        fgac_user='test',
+        fgac_password=opensearch_password
+    )
+    return client
+
+# testing multiple versions
+@pytest.fixture(params=['opensearch_1_0_client', 'elasticsearch_7_10_fgac_client'])
+def client(request):
+    return request.getfixturevalue(request.param)
 
 
-def test_create_index():
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+def test_create_index(client):
+    index = 'test_create_index'
+    wr.opensearch.delete_index(client, index)
     response = wr.opensearch.create_index(
-        client,
-        index='test-index1',
+        client=client,
+        index=index,
         mappings={
             'properties': {
                 'name': {'type': 'text'},
@@ -53,12 +99,11 @@ def test_create_index():
             }
         }
     )
-    print(response)
+    assert response.get('acknowledged', False) is True
 
 
-def test_delete_index():
+def test_delete_index(client):
     index = 'test_delete_index'
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
     wr.opensearch.create_index(
         client,
         index=index
@@ -68,10 +113,10 @@ def test_delete_index():
         index=index
     )
     print(response)
+    assert response.get('acknowledged', False) is True
 
 
-def test_index_df():
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+def test_index_df(client):
     response = wr.opensearch.index_df(client,
                                       df=pd.DataFrame([{'_id': '1', 'name': 'John'},
                                                        {'_id': '2', 'name': 'George'},
@@ -82,8 +127,7 @@ def test_index_df():
     print(response)
 
 
-def test_index_documents():
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+def test_index_documents(client):
     response = wr.opensearch.index_documents(client,
                                       documents=[{'_id': '1', 'name': 'John'},
                                                  {'_id': '2', 'name': 'George'},
@@ -94,8 +138,7 @@ def test_index_documents():
     print(response)
 
 
-def test_index_documents_id_keys():
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+def test_index_documents_id_keys(client):
     response = wr.opensearch.index_documents(client,
                                              documents=inspections_documents,
                                              index='test_index_documents_id_keys',
@@ -104,8 +147,7 @@ def test_index_documents_id_keys():
     print(response)
 
 
-def test_index_documents_no_id_keys():
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+def test_index_documents_no_id_keys(client):
     response = wr.opensearch.index_documents(client,
                                              documents=inspections_documents,
                                              index='test_index_documents_no_id_keys'
@@ -113,9 +155,8 @@ def test_index_documents_no_id_keys():
     print(response)
 
 
-def test_search():
+def test_search(client):
     index = 'test_search'
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
     response = wr.opensearch.index_documents(client,
                                              documents=inspections_documents,
                                              index=index,
@@ -139,9 +180,8 @@ def test_search():
     print(df.to_string())
 
 
-def test_search_filter_path():
+def test_search_filter_path(client):
     index = 'test_search'
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
     response = wr.opensearch.index_documents(client,
                                              documents=inspections_documents,
                                              index=index,
@@ -166,9 +206,8 @@ def test_search_filter_path():
     print(df.to_string())
 
 
-def test_search_scroll():
+def test_search_scroll(client):
     index = 'test_search_scroll'
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
     response = wr.opensearch.index_documents(client,
                                              documents=inspections_documents,
                                              index=index,
@@ -186,9 +225,8 @@ def test_search_scroll():
     print(df.to_string())
 
 
-def test_search_sql():
+def test_search_sql(client):
     index = 'test_search_sql'
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
     response = wr.opensearch.index_documents(client,
                                              documents=inspections_documents,
                                              index=index,
@@ -204,9 +242,8 @@ def test_search_sql():
     print(df.to_string())
 
 
-def test_index_json_local():
-    file_path = '/tmp/inspections.json'
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+def test_index_json_local(client):
+    file_path = f'{tempfile.gettempdir()}/inspections.json'
     with open(file_path, 'w') as filehandle:
         for doc in inspections_documents:
             filehandle.write('%s\n' % json.dumps(doc))
@@ -218,33 +255,48 @@ def test_index_json_local():
     print(response)
 
 
-def test_index_json_s3():
-    file_path = '/tmp/inspections.json'
-    s3_key = 'tmp/inspections.json'
-    client = wr.opensearch.connect(host=OPENSEARCH_DOMAIN)
+def test_index_json_s3(client, path):
+    file_path = f'{tempfile.gettempdir()}/inspections.json'
     with open(file_path, 'w') as filehandle:
         for doc in inspections_documents:
             filehandle.write('%s\n' % json.dumps(doc))
     s3 = boto3.client('s3')
-    s3.upload_file(file_path, BUCKET, s3_key)
+    path = f"{path}opensearch/inspections.json"
+    bucket, key = wr._utils.parse_path(path)
+    s3.upload_file(file_path, bucket, key)
     response = wr.opensearch.index_json(
         client,
         index='test_index_json_s3',
-        path=f's3://{BUCKET}/{s3_key}'
+        path=path
     )
     print(response)
 
 
-def test_index_csv_local():
-    file_path = '/tmp/inspections.csv'
+def test_index_csv_local(client):
+    file_path = f'{tempfile.gettempdir()}/inspections.csv'
     index = 'test_index_csv_local'
     df=pd.DataFrame(inspections_documents)
     df.to_csv(file_path, index=False)
-    client = wr.opensearch.connect(OPENSEARCH_DOMAIN)
-    wr.opensearch.delete_index(client, index)
     response = wr.opensearch.index_csv(
         client,
         path=file_path,
         index=index
     )
-    print(response)
\ No newline at end of file
+    print(response)
+
+
+def test_index_csv_s3(client, path):
+    file_path = f'{tempfile.gettempdir()}/inspections.csv'
+    index = 'test_index_csv_s3'
+    df=pd.DataFrame(inspections_documents)
+    df.to_csv(file_path, index=False)
+    s3 = boto3.client('s3')
+    path = f"{path}opensearch/inspections.csv"
+    bucket, key = wr._utils.parse_path(path)
+    s3.upload_file(file_path, bucket, key)
+    response = wr.opensearch.index_csv(
+        client,
+        path=path,
+        index=index
+    )
+    print(response)

From d57434190b54257f6922bbebc38df21349931580 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Mon, 27 Sep 2021 23:57:20 -0400
Subject: [PATCH 14/41] [skip ci] index create/delete ignore exceptions

---
 awswrangler/opensearch/_write.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index ad845c2b1..e2c76e115 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -144,7 +144,9 @@ def create_index(
         body['settings'] = settings
     if body == {}:
         body = None
-    response = client.indices.create(index, body, ignore=[400, 404])
+
+    # ignore 400 cause by IndexAlreadyExistsException when creating an index
+    response = client.indices.create(index, body, ignore=400)
     if 'error' in response:
         _logger.warning(response)
         if str(response['error']).startswith(u'MapperParsingException'):
@@ -182,6 +184,8 @@ def delete_index(
     ... )
 
     """
+
+    # ignore 400/404 IndexNotFoundError exception
     response = client.indices.delete(index, ignore=[400, 404])
     if 'error' in response:
         _logger.warning(response)
@@ -381,7 +385,7 @@ def index_df(
 
 def index_documents(
     client: Elasticsearch,
-    documents: Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]],
+    documents: Iterable[Mapping[str, Any]],
     index: str,
     doc_type: Optional[str] = None,
     keys_to_write: Optional[List[str]] = None,

From 7bb6779c84a52b3db473fc4491cd4389ddb417d2 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Mon, 27 Sep 2021 23:59:31 -0400
Subject: [PATCH 15/41] [skip ci] index_documents documents type

---
 awswrangler/opensearch/_write.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index e2c76e115..6126d0dfb 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -411,7 +411,7 @@ def index_documents(
     ----------
     client : Elasticsearch
         instance of elasticsearch.Elasticsearch to use.
-    documents : Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]]
+    documents : Iterable[Mapping[str, Any]]
         List which contains the documents that will be inserted.
     index : str
         Name of the index.

From 75a2701617373ada50b64b024a29e65ef34fb66f Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 00:42:39 -0400
Subject: [PATCH 16/41] [skip ci] removed pandasticsearch dependency

---
 awswrangler/opensearch/_read.py | 50 ++++++++++++++++++++++++---------
 tests/test_opensearch.py        |  8 ++++++
 2 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index 80ec509d5..ef64bd275 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -1,6 +1,5 @@
 """Amazon OpenSearch Read Module (PRIVATE)."""
 
-from pandasticsearch import Select, DataFrame
 from typing import Any, Dict, Optional
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import scan
@@ -8,6 +7,37 @@
 import pandas as pd
 
 
+def _resolve_fields(row):
+    fields = {}
+    for field in row:
+        if isinstance(row[field], dict):
+            nested_fields = _resolve_fields(row[field])
+            for n_field, val in nested_fields.items():
+                fields["{}.{}".format(field, n_field)] = val
+        else:
+            fields[field] = row[field]
+    return fields
+
+
+def _hit_to_row(hit):
+    row = {}
+    for k in hit.keys():
+        if k == '_source':
+            solved_fields = _resolve_fields(hit['_source'])
+            row.update(solved_fields)
+        elif k.startswith('_'):
+            row[k] = hit[k]
+    return row
+
+
+def _search_response_to_documents(response: dict):
+    return [_hit_to_row(hit) for hit in response['hits']['hits']]
+
+
+def _search_response_to_df(response: dict):
+    return pd.DataFrame(_search_response_to_documents(response))
+
+
 def search(
     client: Elasticsearch,
     index: Optional[str] = '_all',
@@ -15,7 +45,7 @@ def search(
     doc_type: Optional[str] = None,
     is_scroll: Optional[bool] = False,
     **kwargs
-) -> DataFrame:
+) -> pd.DataFrame:
     """Returns results matching query DSL as pandas dataframe.
 
     Parameters
@@ -65,10 +95,6 @@ def search(
     if doc_type:
         kwargs['doc_type'] = doc_type
 
-    # pandasticsearch.Select.from_dict requires `took` field
-    if 'filter_path' in kwargs:
-        if 'took' not in kwargs['filter_path']:
-            kwargs['filter_path'].append('took')
     if is_scroll:
         documents_generator = scan(
             client,
@@ -76,12 +102,11 @@ def search(
             query=search_body,
             **kwargs
         )
-        s = Select()
-        documents = map(lambda x: s.hit_to_row(x), documents_generator)
+        documents = map(lambda x: _hit_to_row(x), documents_generator)
         df = pd.DataFrame(documents)
     else:
-        documents = client.search(index=index, body=search_body, **kwargs)
-        df = Select.from_dict(documents).to_pandas()
+        response = client.search(index=index, body=search_body, **kwargs)
+        df = _search_response_to_df(response)
     return df
 
 
@@ -89,7 +114,7 @@ def search_by_sql(
     client: Elasticsearch,
     sql_query: str,
     **kwargs
-) -> DataFrame:
+) -> pd.DataFrame:
     """Returns results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe
 
     Parameters
@@ -144,6 +169,5 @@ def _sql_response_to_docs(response: Dict[str, Any]):
         body=body,
         params=kwargs
     )
-
-    df = Select.from_dict(response).to_pandas()
+    df = _search_response_to_df(response)
     return df
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 84e669757..c409e368b 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -178,6 +178,7 @@ def test_search(client):
 
     print('')
     print(df.to_string())
+    assert df.shape[0] == 3
 
 
 def test_search_filter_path(client):
@@ -204,6 +205,7 @@ def test_search_filter_path(client):
 
     print('')
     print(df.to_string())
+    assert df.shape[0] == 3
 
 
 def test_search_scroll(client):
@@ -223,6 +225,7 @@ def test_search_scroll(client):
 
     print('')
     print(df.to_string())
+    assert df.shape[0] == 5
 
 
 def test_search_sql(client):
@@ -240,6 +243,7 @@ def test_search_sql(client):
 
     print('')
     print(df.to_string())
+    assert df.shape[0] == 5
 
 
 def test_index_json_local(client):
@@ -253,6 +257,7 @@ def test_index_json_local(client):
         path=file_path
     )
     print(response)
+    assert response.get('success', 0) == 6
 
 
 def test_index_json_s3(client, path):
@@ -270,6 +275,7 @@ def test_index_json_s3(client, path):
         path=path
     )
     print(response)
+    assert response.get('success', 0) == 6
 
 
 def test_index_csv_local(client):
@@ -283,6 +289,7 @@ def test_index_csv_local(client):
         index=index
     )
     print(response)
+    assert response.get('success', 0) == 6
 
 
 def test_index_csv_s3(client, path):
@@ -300,3 +307,4 @@ def test_index_csv_s3(client, path):
         index=index
     )
     print(response)
+    assert response.get('success', 0) == 6

From cea9abbc3c6b0b07ad4652dc92277110d484eee2 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 00:44:52 -0400
Subject: [PATCH 17/41] [skip ci] port typo

---
 awswrangler/opensearch/_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index 1bbfeedf7..143fdb366 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -5,7 +5,6 @@
 import boto3
 import logging
 
-from awswrangler import _utils, exceptions
 from elasticsearch import Elasticsearch, RequestsHttpConnection
 from requests_aws4auth import AWS4Auth
 
@@ -77,7 +76,7 @@ def connect(
     valid_ports = {80, 443}
 
     if port not in valid_ports:
-        raise ValueError("results: status must be one of %r." % valid_ports)
+        raise ValueError("results: port must be one of %r." % valid_ports)
 
     if fgac_user and fgac_password:
         http_auth = (fgac_user, fgac_password)

From f6c7dd4c6533540bb53edf000af1d97e0d518061 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 00:57:50 -0400
Subject: [PATCH 18/41] [skip ci] enforced_pandas_params

---
 awswrangler/opensearch/_write.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 6126d0dfb..0df6cee64 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -63,8 +63,6 @@ def _deserialize(v):
 
     df_iter = df.iterrows()
     for i, document in df_iter:
-        # print(document)
-        # yield document
         yield {k: _deserialize(v) for k, v in document.items() if notna(v)}
 
 
@@ -281,8 +279,9 @@ def index_csv(
         Name of the document type (only for Elasticsearch versions 5.x and older).
     pandas_kwargs :
         Dictionary of arguments forwarded to pandas.read_csv().
-        e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
+        e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']}
         https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
+        Note: these params values are enforced: `skip_blank_lines=True`
     **kwargs :
         KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
         which is used to execute the operation
@@ -313,14 +312,15 @@ def index_csv(
     ...     client=client,
     ...     path='docs.csv',
     ...     index='sample-index1',
-    ...     pandas_kwargs={'sep': '|', 'na_values': ['null', 'none'], 'skip_blank_lines': True}
+    ...     pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']}
     ... )
     """
-    custom_pandas_params = {
+    enforced_pandas_params = {
         'skip_blank_lines': True,
-        'na_filter': True # will generate Nan value for empty cells. We remove Nan keys in _df_doc_generator
+        # 'na_filter': True  # will generate Nan value for empty cells. We remove Nan keys in _df_doc_generator
+        # Note: if the user will pass na_filter=False null fields will be indexed as well ({"k1": null, "k2": null})
     }
-    pandas_kwargs.update(custom_pandas_params)
+    pandas_kwargs.update(enforced_pandas_params)
     df = pd.read_csv(path, **pandas_kwargs)
     return index_df(
         client,

From 030e21c0aa00e7c82d4578ea2bf21b680850310e Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 02:06:24 -0400
Subject: [PATCH 19/41] [skip ci] isort & black

---
 awswrangler/opensearch/__init__.py    |  25 +-
 awswrangler/opensearch/_read.py       |  55 ++--
 awswrangler/opensearch/_utils.py      |  25 +-
 awswrangler/opensearch/_write.py      | 131 ++++------
 test_infra/app.py                     |   2 +-
 test_infra/stacks/opensearch_stack.py |  58 ++---
 tests/_utils.py                       |   7 +-
 tests/test_opensearch.py              | 344 ++++++++++++++------------
 8 files changed, 315 insertions(+), 332 deletions(-)

diff --git a/awswrangler/opensearch/__init__.py b/awswrangler/opensearch/__init__.py
index 222519747..205e70b59 100644
--- a/awswrangler/opensearch/__init__.py
+++ b/awswrangler/opensearch/__init__.py
@@ -1,16 +1,17 @@
 """Utilities Module for Amazon OpenSearch."""
 
-from awswrangler.opensearch._utils import connect
-from awswrangler.opensearch._write import create_index, delete_index, index_csv, index_documents, index_df, index_json
 from awswrangler.opensearch._read import search, search_by_sql
+from awswrangler.opensearch._utils import connect
+from awswrangler.opensearch._write import create_index, delete_index, index_csv, index_df, index_documents, index_json
 
-__all__ = ["connect",
-           "create_index",
-           "delete_index",
-           "index_csv",
-           "index_documents",
-           "index_df",
-           "index_json",
-           "search",
-           "search_by_sql"
-           ]
+__all__ = [
+    "connect",
+    "create_index",
+    "delete_index",
+    "index_csv",
+    "index_documents",
+    "index_df",
+    "index_json",
+    "search",
+    "search_by_sql",
+]
diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index ef64bd275..33a2a9b2c 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -1,10 +1,12 @@
 """Amazon OpenSearch Read Module (PRIVATE)."""
 
 from typing import Any, Dict, Optional
+
+import pandas as pd
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import scan
+
 from awswrangler.opensearch._utils import _get_distribution
-import pandas as pd
 
 
 def _resolve_fields(row):
@@ -22,16 +24,16 @@ def _resolve_fields(row):
 def _hit_to_row(hit):
     row = {}
     for k in hit.keys():
-        if k == '_source':
-            solved_fields = _resolve_fields(hit['_source'])
+        if k == "_source":
+            solved_fields = _resolve_fields(hit["_source"])
             row.update(solved_fields)
-        elif k.startswith('_'):
+        elif k.startswith("_"):
             row[k] = hit[k]
     return row
 
 
 def _search_response_to_documents(response: dict):
-    return [_hit_to_row(hit) for hit in response['hits']['hits']]
+    return [_hit_to_row(hit) for hit in response["hits"]["hits"]]
 
 
 def _search_response_to_df(response: dict):
@@ -40,11 +42,11 @@ def _search_response_to_df(response: dict):
 
 def search(
     client: Elasticsearch,
-    index: Optional[str] = '_all',
+    index: Optional[str] = "_all",
     search_body: Optional[Dict[str, Any]] = None,
     doc_type: Optional[str] = None,
     is_scroll: Optional[bool] = False,
-    **kwargs
+    **kwargs,
 ) -> pd.DataFrame:
     """Returns results matching query DSL as pandas dataframe.
 
@@ -93,15 +95,10 @@ def search(
 
     """
     if doc_type:
-        kwargs['doc_type'] = doc_type
+        kwargs["doc_type"] = doc_type
 
     if is_scroll:
-        documents_generator = scan(
-            client,
-            index=index,
-            query=search_body,
-            **kwargs
-        )
+        documents_generator = scan(client, index=index, query=search_body, **kwargs)
         documents = map(lambda x: _hit_to_row(x), documents_generator)
         df = pd.DataFrame(documents)
     else:
@@ -110,11 +107,7 @@ def search(
     return df
 
 
-def search_by_sql(
-    client: Elasticsearch,
-    sql_query: str,
-    **kwargs
-) -> pd.DataFrame:
+def search_by_sql(client: Elasticsearch, sql_query: str, **kwargs) -> pd.DataFrame:
     """Returns results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe
 
     Parameters
@@ -147,27 +140,23 @@ def search_by_sql(
 
     # can be used if not passing format
     def _sql_response_to_docs(response: Dict[str, Any]):
-        header = list(map(lambda x: x['name'], response.get('schema', [])))
-        for datarow in response.get('datarows', []):
+        header = list(map(lambda x: x["name"], response.get("schema", [])))
+        for datarow in response.get("datarows", []):
             yield dict(zip(header, datarow))
 
-    if _get_distribution(client) == 'opensearch':
-        url = '/_plugins/_sql'
+    if _get_distribution(client) == "opensearch":
+        url = "/_plugins/_sql"
     else:
-        url = '/_opendistro/_sql'
+        url = "/_opendistro/_sql"
 
-    kwargs['format'] = 'json'
-    body = {'query': sql_query}
-    for size_att in ['size', 'fetch_size']:
+    kwargs["format"] = "json"
+    body = {"query": sql_query}
+    for size_att in ["size", "fetch_size"]:
         if size_att in kwargs:
-            body['fetch_size'] = kwargs[size_att]
+            body["fetch_size"] = kwargs[size_att]
             del kwargs[size_att]  # unrecognized parameter
     response = client.transport.perform_request(
-        "POST",
-        url,
-        headers={'Content-Type': 'application/json'},
-        body=body,
-        params=kwargs
+        "POST", url, headers={"Content-Type": "application/json"}, body=body, params=kwargs
     )
     df = _search_response_to_df(response)
     return df
diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index 143fdb366..f15f6105c 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -1,29 +1,27 @@
 """Amazon OpenSearch Utils Module (PRIVATE)."""
 
+import logging
 from typing import Optional
 
 import boto3
-import logging
-
 from elasticsearch import Elasticsearch, RequestsHttpConnection
 from requests_aws4auth import AWS4Auth
 
-
 _logger: logging.Logger = logging.getLogger(__name__)
 
 
-def _get_distribution(client: Elasticsearch):
-    return client.info().get('version', {}).get('distribution', 'elasticsearch')
+def _get_distribution(client: Elasticsearch) -> str:
+    return client.info().get("version", {}).get("distribution", "elasticsearch")
 
 
 def _get_version(client: Elasticsearch):
-    return client.info().get('version', {}).get('number')
+    return client.info().get("version", {}).get("number")
 
 
 def _get_version_major(client: Elasticsearch):
     version = _get_version(client)
     if version:
-        return int(version.split('.')[0])
+        return int(version.split(".")[0])
     return None
 
 
@@ -33,8 +31,7 @@ def connect(
     boto3_session: Optional[boto3.Session] = boto3.Session(),
     region: Optional[str] = None,
     fgac_user: Optional[str] = None,
-    fgac_password: Optional[str] = None
-
+    fgac_password: Optional[str] = None,
 ) -> Elasticsearch:
     """Creates a secure connection to the specified Amazon OpenSearch domain.
 
@@ -84,13 +81,7 @@ def connect(
         if region is None:
             region = boto3_session.region_name
         creds = boto3_session.get_credentials()
-        http_auth = AWS4Auth(
-            creds.access_key,
-            creds.secret_key,
-            region,
-            'es',
-            creds.token
-        )
+        http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", creds.token)
     try:
         es = Elasticsearch(
             host=host,
@@ -98,7 +89,7 @@ def connect(
             http_auth=http_auth,
             use_ssl=True,
             verify_certs=True,
-            connection_class=RequestsHttpConnection
+            connection_class=RequestsHttpConnection,
         )
     except Exception as e:
         _logger.error("Error connecting to Opensearch cluster. Please verify authentication details")
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 0df6cee64..ace119b02 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -1,19 +1,20 @@
 """Amazon OpenSearch Write Module (PRIVATE)."""
 
+import ast
+import json
 import logging
 import uuid
-import boto3
-import json
-import ast
 from pathlib import Path
-from typing import Any, Dict, List, Mapping, Optional, Union, Tuple, Iterable
-from awswrangler.opensearch._utils import _get_distribution, _get_version_major
-from awswrangler._utils import parse_path
-import pandas as pd
-from pandas import notna
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union
 
+import boto3
+import pandas as pd
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import bulk
+from pandas import notna
+
+from awswrangler._utils import parse_path
+from awswrangler.opensearch._utils import _get_distribution, _get_version_major
 
 _logger: logging.Logger = logging.getLogger(__name__)
 
@@ -21,43 +22,42 @@
 def _selected_keys(document: Dict, keys_to_write: Optional[List[str]]):
     if keys_to_write is None:
         keys_to_write = document.keys()
-    keys_to_write = filter(lambda x: x != '_id', keys_to_write)
-    return {key: document[key] for key in keys_to_write }
+    keys_to_write = filter(lambda x: x != "_id", keys_to_write)
+    return {key: document[key] for key in keys_to_write}
 
 
-def _actions_generator(documents: Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]],
-                   index: str,
-                   doc_type: Optional[str],
-                   keys_to_write: Optional[List[str]],
-                   id_keys: Optional[List[str]]):
+def _actions_generator(
+    documents: Union[Iterable[Dict[str, Any]], Iterable[Mapping[str, Any]]],
+    index: str,
+    doc_type: Optional[str],
+    keys_to_write: Optional[List[str]],
+    id_keys: Optional[List[str]],
+):
     for document in documents:
         if id_keys:
-            _id = '-'.join(list(map(lambda x: str(document[x]), id_keys)))
+            _id = "-".join(list(map(lambda x: str(document[x]), id_keys)))
         else:
-            _id = document.get('_id', uuid.uuid4())
+            _id = document.get("_id", uuid.uuid4())
         yield {
-                "_index": index,
-                "_type": doc_type,
-                "_id" : _id,
-                "_source": _selected_keys(document, keys_to_write),
-            }
+            "_index": index,
+            "_type": doc_type,
+            "_id": _id,
+            "_source": _selected_keys(document, keys_to_write),
+        }
 
 
 def _df_doc_generator(df: pd.DataFrame):
     def _deserialize(v):
         if isinstance(v, str):
             v = v.strip()
-            if (v.startswith('{') and v.endswith('}')
-                    or
-                    v.startswith('[') and v.endswith(']')
-            ):
+            if v.startswith("{") and v.endswith("}") or v.startswith("[") and v.endswith("]"):
                 try:
                     v = json.loads(v)
                 except Exception as e:
                     try:
                         v = ast.literal_eval(v)  # if properties are enclosed with single quotes
                     except:
-                        _logger.warning(f'could not convert string to json: {v}')
+                        _logger.warning(f"could not convert string to json: {v}")
                         _logger.warning(e)
         return v
 
@@ -80,7 +80,7 @@ def create_index(
     index: str,
     doc_type: Optional[str] = None,
     settings: Optional[Dict[str, Any]] = None,
-    mappings: Optional[Dict[str, Any]] = None
+    mappings: Optional[Dict[str, Any]] = None,
 ) -> Dict[str, Any]:
     """Creates an index.
 
@@ -131,31 +131,28 @@ def create_index(
 
     body = {}
     if mappings:
-        if _get_distribution(client) == 'opensearch' or _get_version_major(client) >= 7:
-            body['mappings'] = mappings  # doc type deprecated
+        if _get_distribution(client) == "opensearch" or _get_version_major(client) >= 7:
+            body["mappings"] = mappings  # doc type deprecated
         else:
             if doc_type:
-                body['mappings'] = {doc_type: mappings}
+                body["mappings"] = {doc_type: mappings}
             else:
-                body['mappings'] = {index: mappings}
+                body["mappings"] = {index: mappings}
     if settings:
-        body['settings'] = settings
+        body["settings"] = settings
     if body == {}:
         body = None
 
     # ignore 400 cause by IndexAlreadyExistsException when creating an index
     response = client.indices.create(index, body, ignore=400)
-    if 'error' in response:
+    if "error" in response:
         _logger.warning(response)
-        if str(response['error']).startswith(u'MapperParsingException'):
-            raise ValueError(response['error'])
+        if str(response["error"]).startswith("MapperParsingException"):
+            raise ValueError(response["error"])
     return response
 
 
-def delete_index(
-    client: Elasticsearch,
-    index: str
-) -> Dict[str, Any]:
+def delete_index(client: Elasticsearch, index: str) -> Dict[str, Any]:
     """Creates an index.
 
     Parameters
@@ -185,7 +182,7 @@ def delete_index(
 
     # ignore 400/404 IndexNotFoundError exception
     response = client.indices.delete(index, ignore=[400, 404])
-    if 'error' in response:
+    if "error" in response:
         _logger.warning(response)
     return response
 
@@ -196,7 +193,7 @@ def index_json(
     index: str,
     doc_type: Optional[str] = None,
     boto3_session: Optional[boto3.Session] = boto3.Session(),
-    **kwargs
+    **kwargs,
 ) -> Dict[str, Any]:
     """Index all documents from JSON file to OpenSearch index.
 
@@ -241,20 +238,14 @@ def index_json(
 
     if path.startswith("s3://"):
         bucket, key = parse_path(path)
-        s3 = boto3_session.client('s3')
+        s3 = boto3_session.client("s3")
         obj = s3.get_object(Bucket=bucket, Key=key)
-        body = obj['Body'].read()
+        body = obj["Body"].read()
         lines = body.splitlines()
         documents = map(lambda x: json.loads(x), lines)
-    else: # local path
+    else:  # local path
         documents = _file_line_generator(path, is_json=True)
-    return index_documents(
-        client=client,
-        documents=documents,
-        index=index,
-        doc_type=doc_type,
-        **kwargs
-    )
+    return index_documents(client=client, documents=documents, index=index, doc_type=doc_type, **kwargs)
 
 
 def index_csv(
@@ -263,7 +254,7 @@ def index_csv(
     index: str,
     doc_type: Optional[str] = None,
     pandas_kwargs: Optional[Dict[str, Any]] = {},
-    **kwargs
+    **kwargs,
 ) -> Dict[str, Any]:
     """Index all documents from a CSV file to OpenSearch index.
 
@@ -316,27 +307,17 @@ def index_csv(
     ... )
     """
     enforced_pandas_params = {
-        'skip_blank_lines': True,
+        "skip_blank_lines": True,
         # 'na_filter': True  # will generate Nan value for empty cells. We remove Nan keys in _df_doc_generator
         # Note: if the user will pass na_filter=False null fields will be indexed as well ({"k1": null, "k2": null})
     }
     pandas_kwargs.update(enforced_pandas_params)
     df = pd.read_csv(path, **pandas_kwargs)
-    return index_df(
-        client,
-        df=df,
-        index=index,
-        doc_type=doc_type,
-        **kwargs
-    )
+    return index_df(client, df=df, index=index, doc_type=doc_type, **kwargs)
 
 
 def index_df(
-    client: Elasticsearch,
-    df: pd.DataFrame,
-    index: str,
-    doc_type: Optional[str] = None,
-    **kwargs
+    client: Elasticsearch, df: pd.DataFrame, index: str, doc_type: Optional[str] = None, **kwargs
 ) -> Dict[str, Any]:
     """Index all documents from a DataFrame to OpenSearch index.
 
@@ -374,13 +355,7 @@ def index_df(
     ... )
     """
 
-    return index_documents(
-        client=client,
-        documents=_df_doc_generator(df),
-        index=index,
-        doc_type=doc_type,
-        **kwargs
-    )
+    return index_documents(client=client, documents=_df_doc_generator(df), index=index, doc_type=doc_type, **kwargs)
 
 
 def index_documents(
@@ -396,8 +371,7 @@ def index_documents(
     max_retries: Optional[int] = 0,
     initial_backoff: Optional[int] = 2,
     max_backoff: Optional[int] = 600,
-    **kwargs
-
+    **kwargs,
 ) -> Dict[str, Any]:
     """Index all documents to OpenSearch index.
 
@@ -467,9 +441,6 @@ def index_documents(
         max_retries=max_retries,
         initial_backoff=initial_backoff,
         max_backoff=max_backoff,
-        **kwargs
+        **kwargs,
     )
-    return {
-        'success': success,
-        'errors': errors
-    }
+    return {"success": success, "errors": errors}
diff --git a/test_infra/app.py b/test_infra/app.py
index b14c1fc81..8c3395e22 100644
--- a/test_infra/app.py
+++ b/test_infra/app.py
@@ -16,7 +16,7 @@
 )
 
 OpenSearchStack(
-app,
+    app,
     "aws-data-wrangler-opensearch",
     base.get_vpc,
     base.get_bucket,
diff --git a/test_infra/stacks/opensearch_stack.py b/test_infra/stacks/opensearch_stack.py
index e32ecbbaa..d5f6d1c67 100644
--- a/test_infra/stacks/opensearch_stack.py
+++ b/test_infra/stacks/opensearch_stack.py
@@ -1,19 +1,17 @@
 from aws_cdk import aws_ec2 as ec2
 from aws_cdk import aws_iam as iam
 from aws_cdk import aws_kms as kms
+from aws_cdk import aws_opensearchservice as opensearch
 from aws_cdk import aws_s3 as s3
 from aws_cdk import aws_secretsmanager as secrets
 from aws_cdk import core as cdk
-from aws_cdk import aws_opensearchservice as opensearch
 
 
 def validate_domain_name(name: str):
     if not 3 <= len(name) <= 28:
-        raise ValueError(f'invalid domain name ({name}) - bad length ({len(name)})')
+        raise ValueError(f"invalid domain name ({name}) - bad length ({len(name)})")
     for c in name:
-        if not ('a' <= c <= 'z'
-                or c.isdigit()
-                or c in ['-']):
+        if not ("a" <= c <= "z" or c.isdigit() or c in ["-"]):
             raise ValueError(f'invalid domain name ({name}) - bad character ("{c}")')
 
 
@@ -54,60 +52,64 @@ def _set_opensearch_infra(self) -> None:
         self.password = self.password_secret.to_string()
 
     def _setup_opensearch_1_0(self) -> None:
-        domain_name = 'wrangler-os-1-0'
+        domain_name = "wrangler-os-1-0"
         validate_domain_name(domain_name)
-        domain_arn = f'arn:aws:es:{self.region}:{self.account}:domain/{domain_name}'
-        domain = opensearch.Domain(self, domain_name,
+        domain_arn = f"arn:aws:es:{self.region}:{self.account}:domain/{domain_name}"
+        domain = opensearch.Domain(
+            self,
+            domain_name,
             domain_name=domain_name,
             version=opensearch.EngineVersion.OPENSEARCH_1_0,
             capacity=opensearch.CapacityConfig(
-                data_node_instance_type='t3.small.search',
-                data_nodes=1
+                data_node_instance_type="t3.small.search", data_nodes=1
             ),
             access_policies=[
                 iam.PolicyStatement(
                     effect=iam.Effect.ALLOW,
                     actions=["es:*"],
                     principals=[iam.AccountRootPrincipal()],
-                    resources=[f"{domain_arn}/*"]
+                    resources=[f"{domain_arn}/*"],
                 )
             ],
-            removal_policy=cdk.RemovalPolicy.DESTROY
+            removal_policy=cdk.RemovalPolicy.DESTROY,
         )
 
-        cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint)
+        cdk.CfnOutput(
+            self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint
+        )
 
     def _setup_elasticsearch_7_10_fgac(self) -> None:
-        domain_name = 'wrangler-es-7-10-fgac'
+        domain_name = "wrangler-es-7-10-fgac"
         validate_domain_name(domain_name)
-        domain_arn = f'arn:aws:es:{self.region}:{self.account}:domain/{domain_name}'
-        domain = opensearch.Domain(self, domain_name,
+        domain_arn = f"arn:aws:es:{self.region}:{self.account}:domain/{domain_name}"
+        domain = opensearch.Domain(
+            self,
+            domain_name,
             domain_name=domain_name,
             version=opensearch.EngineVersion.ELASTICSEARCH_7_10,
             capacity=opensearch.CapacityConfig(
-                data_node_instance_type='t3.small.search',
-                data_nodes=1
+                data_node_instance_type="t3.small.search", data_nodes=1
             ),
             access_policies=[
                 iam.PolicyStatement(
                     effect=iam.Effect.ALLOW,
                     actions=["es:*"],
-                    principals=[iam.AnyPrincipal()],  # FGAC
-                    resources=[f"{domain_arn}/*"]
+                    principals=[iam.AnyPrincipal()],  # FGACs
+                    resources=[f"{domain_arn}/*"],
                 )
             ],
             fine_grained_access_control=opensearch.AdvancedSecurityOptions(
                 master_user_name=self.username,
-                # master_user_password=self.password_secret.plain_text("aws-data-wrangler/opensearch_password")
-                master_user_password=self.password_secret
+                master_user_password=self.password_secret,
             ),
-            # Node-to-node encryption is required when fine-grained access control is enabled
             node_to_node_encryption=True,
-            # Encryption-at-rest is required when fine-grained access control is enabled
-            encryption_at_rest=opensearch.EncryptionAtRestOptions(enabled=True, kms_key=self.key),
-            # Enforce HTTPS is required when fine-grained access control is enabled
+            encryption_at_rest=opensearch.EncryptionAtRestOptions(
+                enabled=True, kms_key=self.key
+            ),
             enforce_https=True,
-            removal_policy=cdk.RemovalPolicy.DESTROY
+            removal_policy=cdk.RemovalPolicy.DESTROY,
         )
 
-        cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint)
+        cdk.CfnOutput(
+            self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint
+        )
diff --git a/tests/_utils.py b/tests/_utils.py
index 9e4b595e3..5f74c4e83 100644
--- a/tests/_utils.py
+++ b/tests/_utils.py
@@ -528,9 +528,10 @@ def extract_cloudformation_outputs():
     client = boto3.client("cloudformation")
     response = try_it(client.describe_stacks, botocore.exceptions.ClientError, max_num_tries=5)
     for stack in response.get("Stacks"):
-        if (stack["StackName"] in ["aws-data-wrangler-base", "aws-data-wrangler-databases", "aws-data-wrangler-opensearch"]) and (
-            stack["StackStatus"] in CFN_VALID_STATUS
-        ):
+        if (
+            stack["StackName"]
+            in ["aws-data-wrangler-base", "aws-data-wrangler-databases", "aws-data-wrangler-opensearch"]
+        ) and (stack["StackStatus"] in CFN_VALID_STATUS):
             for output in stack.get("Outputs"):
                 outputs[output.get("OutputKey")] = output.get("OutputValue")
     return outputs
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index c409e368b..68a5f4f8b 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -1,10 +1,10 @@
+import json
 import logging
+import tempfile
 
 import boto3
-import pytest  # type: ignore
 import pandas as pd
-import json
-import tempfile
+import pytest  # type: ignore
 
 import awswrangler as wr
 
@@ -14,12 +14,110 @@
 
 
 inspections_documents = [
-{"business_address":"315 California St","business_city":"San Francisco","business_id":"24936","business_latitude":"37.793199","business_location":{"lon": -122.400152,"lat": 37.793199},"business_longitude":"-122.400152","business_name":"San Francisco Soup Company","business_postal_code":"94104","business_state":"CA","inspection_date":"2016-06-09T00:00:00.000","inspection_id":"24936_20160609","inspection_score":77,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Improper food labeling or menu misrepresentation","violation_id":"24936_20160609_103141"},
-{"business_address":"10 Mason St","business_city":"San Francisco","business_id":"60354","business_latitude":"37.783527","business_location":{"lon": -122.409061,"lat": 37.783527},"business_longitude":"-122.409061","business_name":"Soup Unlimited","business_postal_code":"94102","business_state":"CA","inspection_date":"2016-11-23T00:00:00.000","inspection_id":"60354_20161123","inspection_type":"Routine", "inspection_score": 95},
-{"business_address":"2872 24th St","business_city":"San Francisco","business_id":"1797","business_latitude":"37.752807","business_location":{"lon": -122.409752,"lat": 37.752807},"business_longitude":"-122.409752","business_name":"TIO CHILOS GRILL","business_postal_code":"94110","business_state":"CA","inspection_date":"2016-07-05T00:00:00.000","inspection_id":"1797_20160705","inspection_score":90,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Unclean nonfood contact surfaces","violation_id":"1797_20160705_103142"},
-{"business_address":"1661 Tennessee St Suite 3B","business_city":"San Francisco Whard Restaurant","business_id":"66198","business_latitude":"37.75072","business_location":{"lon": -122.388478,"lat": 37.75072},"business_longitude":"-122.388478","business_name":"San Francisco Restaurant","business_postal_code":"94107","business_state":"CA","inspection_date":"2016-05-27T00:00:00.000","inspection_id":"66198_20160527","inspection_type":"Routine","inspection_score":56 },
-{"business_address":"2162 24th Ave","business_city":"San Francisco","business_id":"5794","business_latitude":"37.747228","business_location":{"lon": -122.481299,"lat": 37.747228},"business_longitude":"-122.481299","business_name":"Soup House","business_phone_number":"+14155752700","business_postal_code":"94116","business_state":"CA","inspection_date":"2016-09-07T00:00:00.000","inspection_id":"5794_20160907","inspection_score":96,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Unapproved or unmaintained equipment or utensils","violation_id":"5794_20160907_103144"},
-{"business_address":"2162 24th Ave","business_city":"San Francisco","business_id":"5794","business_latitude":"37.747228","business_location":{"lon": -122.481299,"lat": 37.747228},"business_longitude":"-122.481299","business_name":"Soup-or-Salad","business_phone_number":"+14155752700","business_postal_code":"94116","business_state":"CA","inspection_date":"2016-09-07T00:00:00.000","inspection_id":"5794_20160907","inspection_score":96,"inspection_type":"Routine - Unscheduled","risk_category":"Low Risk","violation_description":"Unapproved or unmaintained equipment or utensils","violation_id":"5794_20160907_103144"}
+    {
+        "business_address": "315 California St",
+        "business_city": "San Francisco",
+        "business_id": "24936",
+        "business_latitude": "37.793199",
+        "business_location": {"lon": -122.400152, "lat": 37.793199},
+        "business_longitude": "-122.400152",
+        "business_name": "San Francisco Soup Company",
+        "business_postal_code": "94104",
+        "business_state": "CA",
+        "inspection_date": "2016-06-09T00:00:00.000",
+        "inspection_id": "24936_20160609",
+        "inspection_score": 77,
+        "inspection_type": "Routine - Unscheduled",
+        "risk_category": "Low Risk",
+        "violation_description": "Improper food labeling or menu misrepresentation",
+        "violation_id": "24936_20160609_103141",
+    },
+    {
+        "business_address": "10 Mason St",
+        "business_city": "San Francisco",
+        "business_id": "60354",
+        "business_latitude": "37.783527",
+        "business_location": {"lon": -122.409061, "lat": 37.783527},
+        "business_longitude": "-122.409061",
+        "business_name": "Soup Unlimited",
+        "business_postal_code": "94102",
+        "business_state": "CA",
+        "inspection_date": "2016-11-23T00:00:00.000",
+        "inspection_id": "60354_20161123",
+        "inspection_type": "Routine",
+        "inspection_score": 95,
+    },
+    {
+        "business_address": "2872 24th St",
+        "business_city": "San Francisco",
+        "business_id": "1797",
+        "business_latitude": "37.752807",
+        "business_location": {"lon": -122.409752, "lat": 37.752807},
+        "business_longitude": "-122.409752",
+        "business_name": "TIO CHILOS GRILL",
+        "business_postal_code": "94110",
+        "business_state": "CA",
+        "inspection_date": "2016-07-05T00:00:00.000",
+        "inspection_id": "1797_20160705",
+        "inspection_score": 90,
+        "inspection_type": "Routine - Unscheduled",
+        "risk_category": "Low Risk",
+        "violation_description": "Unclean nonfood contact surfaces",
+        "violation_id": "1797_20160705_103142",
+    },
+    {
+        "business_address": "1661 Tennessee St Suite 3B",
+        "business_city": "San Francisco Whard Restaurant",
+        "business_id": "66198",
+        "business_latitude": "37.75072",
+        "business_location": {"lon": -122.388478, "lat": 37.75072},
+        "business_longitude": "-122.388478",
+        "business_name": "San Francisco Restaurant",
+        "business_postal_code": "94107",
+        "business_state": "CA",
+        "inspection_date": "2016-05-27T00:00:00.000",
+        "inspection_id": "66198_20160527",
+        "inspection_type": "Routine",
+        "inspection_score": 56,
+    },
+    {
+        "business_address": "2162 24th Ave",
+        "business_city": "San Francisco",
+        "business_id": "5794",
+        "business_latitude": "37.747228",
+        "business_location": {"lon": -122.481299, "lat": 37.747228},
+        "business_longitude": "-122.481299",
+        "business_name": "Soup House",
+        "business_phone_number": "+14155752700",
+        "business_postal_code": "94116",
+        "business_state": "CA",
+        "inspection_date": "2016-09-07T00:00:00.000",
+        "inspection_id": "5794_20160907",
+        "inspection_score": 96,
+        "inspection_type": "Routine - Unscheduled",
+        "risk_category": "Low Risk",
+        "violation_description": "Unapproved or unmaintained equipment or utensils",
+        "violation_id": "5794_20160907_103144",
+    },
+    {
+        "business_address": "2162 24th Ave",
+        "business_city": "San Francisco",
+        "business_id": "5794",
+        "business_latitude": "37.747228",
+        "business_location": {"lon": -122.481299, "lat": 37.747228},
+        "business_longitude": "-122.481299",
+        "business_name": "Soup-or-Salad",
+        "business_phone_number": "+14155752700",
+        "business_postal_code": "94116",
+        "business_state": "CA",
+        "inspection_date": "2016-09-07T00:00:00.000",
+        "inspection_id": "5794_20160907",
+        "inspection_score": 96,
+        "inspection_type": "Routine - Unscheduled",
+        "risk_category": "Low Risk",
+        "violation_description": "Unapproved or unmaintained equipment or utensils",
+        "violation_id": "5794_20160907_103144",
+    },
 ]
 
 
@@ -30,7 +128,9 @@ def cloudformation_outputs():
 
 @pytest.fixture(scope="session")
 def opensearch_password():
-    return boto3.client("secretsmanager").get_secret_value(SecretId="aws-data-wrangler/opensearch_password")["SecretString"]
+    return boto3.client("secretsmanager").get_secret_value(SecretId="aws-data-wrangler/opensearch_password")[
+        "SecretString"
+    ]
 
 
 @pytest.fixture(scope="session")
@@ -51,9 +151,7 @@ def test_connection_opensearch_1_0(domain_endpoint_opensearch_1_0):
 
 def test_connection_elasticsearch_7_10_fgac(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
     client = wr.opensearch.connect(
-        host=domain_endpoint_elasticsearch_7_10_fgac,
-        fgac_user='test',
-        fgac_password=opensearch_password
+        host=domain_endpoint_elasticsearch_7_10_fgac, fgac_user="test", fgac_password=opensearch_password
     )
     print(client.info())
     assert len(client.info()) > 0
@@ -68,243 +166,173 @@ def opensearch_1_0_client(domain_endpoint_opensearch_1_0):
 @pytest.fixture(scope="session")
 def elasticsearch_7_10_fgac_client(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
     client = wr.opensearch.connect(
-        host=domain_endpoint_elasticsearch_7_10_fgac,
-        fgac_user='test',
-        fgac_password=opensearch_password
+        host=domain_endpoint_elasticsearch_7_10_fgac, fgac_user="test", fgac_password=opensearch_password
     )
     return client
 
+
 # testing multiple versions
-@pytest.fixture(params=['opensearch_1_0_client', 'elasticsearch_7_10_fgac_client'])
+@pytest.fixture(params=["opensearch_1_0_client", "elasticsearch_7_10_fgac_client"])
 def client(request):
     return request.getfixturevalue(request.param)
 
 
 def test_create_index(client):
-    index = 'test_create_index'
+    index = "test_create_index"
     wr.opensearch.delete_index(client, index)
     response = wr.opensearch.create_index(
         client=client,
         index=index,
-        mappings={
-            'properties': {
-                'name': {'type': 'text'},
-                'age': {'type': 'integer'}
-            }
-        },
-        settings={
-            'index': {
-                'number_of_shards': 1,
-                'number_of_replicas': 1
-            }
-        }
+        mappings={"properties": {"name": {"type": "text"}, "age": {"type": "integer"}}},
+        settings={"index": {"number_of_shards": 1, "number_of_replicas": 1}},
     )
-    assert response.get('acknowledged', False) is True
+    assert response.get("acknowledged", False) is True
 
 
 def test_delete_index(client):
-    index = 'test_delete_index'
-    wr.opensearch.create_index(
-        client,
-        index=index
-    )
-    response = wr.opensearch.delete_index(
-        client,
-        index=index
-    )
+    index = "test_delete_index"
+    wr.opensearch.create_index(client, index=index)
+    response = wr.opensearch.delete_index(client, index=index)
     print(response)
-    assert response.get('acknowledged', False) is True
+    assert response.get("acknowledged", False) is True
 
 
 def test_index_df(client):
-    response = wr.opensearch.index_df(client,
-                                      df=pd.DataFrame([{'_id': '1', 'name': 'John'},
-                                                       {'_id': '2', 'name': 'George'},
-                                                       {'_id': '3', 'name': 'Julia'}
-                                                       ]),
-                                      index='test_index_df1'
-                                      )
+    response = wr.opensearch.index_df(
+        client,
+        df=pd.DataFrame([{"_id": "1", "name": "John"}, {"_id": "2", "name": "George"}, {"_id": "3", "name": "Julia"}]),
+        index="test_index_df1",
+    )
     print(response)
 
 
 def test_index_documents(client):
-    response = wr.opensearch.index_documents(client,
-                                      documents=[{'_id': '1', 'name': 'John'},
-                                                 {'_id': '2', 'name': 'George'},
-                                                 {'_id': '3', 'name': 'Julia'}
-                                                ],
-                                      index='test_index_documents1'
-                                      )
+    response = wr.opensearch.index_documents(
+        client,
+        documents=[{"_id": "1", "name": "John"}, {"_id": "2", "name": "George"}, {"_id": "3", "name": "Julia"}],
+        index="test_index_documents1",
+    )
     print(response)
 
 
 def test_index_documents_id_keys(client):
-    response = wr.opensearch.index_documents(client,
-                                             documents=inspections_documents,
-                                             index='test_index_documents_id_keys',
-                                             id_keys=['inspection_id']
-                                             )
+    response = wr.opensearch.index_documents(
+        client, documents=inspections_documents, index="test_index_documents_id_keys", id_keys=["inspection_id"]
+    )
     print(response)
 
 
 def test_index_documents_no_id_keys(client):
-    response = wr.opensearch.index_documents(client,
-                                             documents=inspections_documents,
-                                             index='test_index_documents_no_id_keys'
-                                             )
+    response = wr.opensearch.index_documents(
+        client, documents=inspections_documents, index="test_index_documents_no_id_keys"
+    )
     print(response)
 
 
 def test_search(client):
-    index = 'test_search'
-    response = wr.opensearch.index_documents(client,
-                                             documents=inspections_documents,
-                                             index=index,
-                                             id_keys=['inspection_id'],
-                                             refresh='wait_for'
-                                             )
+    index = "test_search"
+    response = wr.opensearch.index_documents(
+        client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
+    )
     df = wr.opensearch.search(
         client,
         index=index,
-        search_body={
-            "query": {
-                "match": {
-                    "business_name": "soup"
-                }
-            }
-        },
-        _source=['inspection_id', 'business_name', 'business_location']
+        search_body={"query": {"match": {"business_name": "soup"}}},
+        _source=["inspection_id", "business_name", "business_location"],
     )
 
-    print('')
+    print("")
     print(df.to_string())
     assert df.shape[0] == 3
 
 
 def test_search_filter_path(client):
-    index = 'test_search'
-    response = wr.opensearch.index_documents(client,
-                                             documents=inspections_documents,
-                                             index=index,
-                                             id_keys=['inspection_id'],
-                                             refresh='wait_for'
-                                             )
+    index = "test_search"
+    response = wr.opensearch.index_documents(
+        client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
+    )
     df = wr.opensearch.search(
         client,
         index=index,
-        search_body={
-            "query": {
-                "match": {
-                    "business_name": "soup"
-                }
-            }
-        },
-        _source=['inspection_id', 'business_name', 'business_location'],
-        filter_path=['hits.hits._source']
+        search_body={"query": {"match": {"business_name": "soup"}}},
+        _source=["inspection_id", "business_name", "business_location"],
+        filter_path=["hits.hits._source"],
     )
 
-    print('')
+    print("")
     print(df.to_string())
     assert df.shape[0] == 3
 
 
 def test_search_scroll(client):
-    index = 'test_search_scroll'
-    response = wr.opensearch.index_documents(client,
-                                             documents=inspections_documents,
-                                             index=index,
-                                             id_keys=['inspection_id'],
-                                             refresh='wait_for'
-                                             )
+    index = "test_search_scroll"
+    response = wr.opensearch.index_documents(
+        client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
+    )
     df = wr.opensearch.search(
-        client,
-        index=index,
-        is_scroll=True,
-        _source=['inspection_id', 'business_name', 'business_location']
+        client, index=index, is_scroll=True, _source=["inspection_id", "business_name", "business_location"]
     )
 
-    print('')
+    print("")
     print(df.to_string())
     assert df.shape[0] == 5
 
 
 def test_search_sql(client):
-    index = 'test_search_sql'
-    response = wr.opensearch.index_documents(client,
-                                             documents=inspections_documents,
-                                             index=index,
-                                             id_keys=['inspection_id'],
-                                             refresh='wait_for'
-                                             )
-    df = wr.opensearch.search_by_sql(
-        client,
-        sql_query=f'select * from {index}'
+    index = "test_search_sql"
+    response = wr.opensearch.index_documents(
+        client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
     )
+    df = wr.opensearch.search_by_sql(client, sql_query=f"select * from {index}")
 
-    print('')
+    print("")
     print(df.to_string())
     assert df.shape[0] == 5
 
 
 def test_index_json_local(client):
-    file_path = f'{tempfile.gettempdir()}/inspections.json'
-    with open(file_path, 'w') as filehandle:
+    file_path = f"{tempfile.gettempdir()}/inspections.json"
+    with open(file_path, "w") as filehandle:
         for doc in inspections_documents:
-            filehandle.write('%s\n' % json.dumps(doc))
-    response = wr.opensearch.index_json(
-        client,
-        index='test_index_json_local',
-        path=file_path
-    )
+            filehandle.write("%s\n" % json.dumps(doc))
+    response = wr.opensearch.index_json(client, index="test_index_json_local", path=file_path)
     print(response)
-    assert response.get('success', 0) == 6
+    assert response.get("success", 0) == 6
 
 
 def test_index_json_s3(client, path):
-    file_path = f'{tempfile.gettempdir()}/inspections.json'
-    with open(file_path, 'w') as filehandle:
+    file_path = f"{tempfile.gettempdir()}/inspections.json"
+    with open(file_path, "w") as filehandle:
         for doc in inspections_documents:
-            filehandle.write('%s\n' % json.dumps(doc))
-    s3 = boto3.client('s3')
+            filehandle.write("%s\n" % json.dumps(doc))
+    s3 = boto3.client("s3")
     path = f"{path}opensearch/inspections.json"
     bucket, key = wr._utils.parse_path(path)
     s3.upload_file(file_path, bucket, key)
-    response = wr.opensearch.index_json(
-        client,
-        index='test_index_json_s3',
-        path=path
-    )
+    response = wr.opensearch.index_json(client, index="test_index_json_s3", path=path)
     print(response)
-    assert response.get('success', 0) == 6
+    assert response.get("success", 0) == 6
 
 
 def test_index_csv_local(client):
-    file_path = f'{tempfile.gettempdir()}/inspections.csv'
-    index = 'test_index_csv_local'
-    df=pd.DataFrame(inspections_documents)
+    file_path = f"{tempfile.gettempdir()}/inspections.csv"
+    index = "test_index_csv_local"
+    df = pd.DataFrame(inspections_documents)
     df.to_csv(file_path, index=False)
-    response = wr.opensearch.index_csv(
-        client,
-        path=file_path,
-        index=index
-    )
+    response = wr.opensearch.index_csv(client, path=file_path, index=index)
     print(response)
-    assert response.get('success', 0) == 6
+    assert response.get("success", 0) == 6
 
 
 def test_index_csv_s3(client, path):
-    file_path = f'{tempfile.gettempdir()}/inspections.csv'
-    index = 'test_index_csv_s3'
-    df=pd.DataFrame(inspections_documents)
+    file_path = f"{tempfile.gettempdir()}/inspections.csv"
+    index = "test_index_csv_s3"
+    df = pd.DataFrame(inspections_documents)
     df.to_csv(file_path, index=False)
-    s3 = boto3.client('s3')
+    s3 = boto3.client("s3")
     path = f"{path}opensearch/inspections.csv"
     bucket, key = wr._utils.parse_path(path)
     s3.upload_file(file_path, bucket, key)
-    response = wr.opensearch.index_csv(
-        client,
-        path=path,
-        index=index
-    )
+    response = wr.opensearch.index_csv(client, path=path, index=index)
     print(response)
-    assert response.get('success', 0) == 6
+    assert response.get("success", 0) == 6

From 950231d4bf88a9550e97549a9bd2bc5b04f47f4a Mon Sep 17 00:00:00 2001
From: Muralidhar Reddy <mureddy@amazon.com>
Date: Tue, 28 Sep 2021 19:10:01 +0530
Subject: [PATCH 20/41] Added OpenSearch tutorial

---
 README.md                        |   1 +
 tutorials/031 - OpenSearch.ipynb | 235 +++++++++++++++++++++++++++++++
 2 files changed, 236 insertions(+)
 create mode 100644 tutorials/031 - OpenSearch.ipynb

diff --git a/README.md b/README.md
index 16ab96390..bed91146f 100644
--- a/README.md
+++ b/README.md
@@ -136,6 +136,7 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3
   - [026 - Amazon Timestream](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/026%20-%20Amazon%20Timestream.ipynb)
   - [027 - Amazon Timestream 2](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/027%20-%20Amazon%20Timestream%202.ipynb)
   - [028 - Amazon DynamoDB](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/028%20-%20DynamoDB.ipynb)
+  - [031 - OpenSearch](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/031%20-%20OpenSearch.ipynb)
 - [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/2.11.0/api.html)
   - [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/2.11.0/api.html#amazon-s3)
   - [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/2.11.0/api.html#aws-glue-catalog)
diff --git a/tutorials/031 - OpenSearch.ipynb b/tutorials/031 - OpenSearch.ipynb
new file mode 100644
index 000000000..cb5cbbbf4
--- /dev/null
+++ b/tutorials/031 - OpenSearch.ipynb	
@@ -0,0 +1,235 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![AWS Data Wrangler](_static/logo.png \"AWS Data Wrangler\")](https://github.com/awslabs/aws-data-wrangler)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 31 - OpenSearch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Table of Contents\n",
+    "* [1. Create Indices](#2.-Create-Indices)\n",
+    "* [2. Write Indices](#3.-Write-Indices)\n",
+    "\t* [2.1 Writing from JSON file](#2.1-Writing-from-JSON-file)\n",
+    "\t* [2.2 Writing from CSV file](#2.2-Writing-from-CSV-file)\n",
+    "* [3. Search Indices](#1.-Search-Indices)\n",
+    "\t* [3.1 Search by DSL](#1.1-Search-by-DSL)\n",
+    "\t* [3.2 Search by SQL](#1.2-Search-by-SQL)\n",
+    "* [4. Delete Indices](#7.-Delete-Indices)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import awswrangler as wr\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Enter your domain endpoint:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Create Indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = wr.opensearch.create_index(\n",
+    "     client=client,\n",
+    "     index=\"tutorials\",\n",
+    "     mappings={\n",
+    "         \"properties\": {\n",
+    "              \"id\":  { \"type\" : \"integer\" },\n",
+    "              \"id\":  { \"type\" : \"string\" },\n",
+    "          }\n",
+    "     },\n",
+    "     settings={\n",
+    "             \"index\": {\n",
+    "                 \"number_of_shards\": 2\n",
+    "                 \"number_of_replicas\": 1\n",
+    "             }\n",
+    "      }\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Write Indices"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.1 Write from JSON files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wr.opensearch.index_json(\n",
+    "       client=client,\n",
+    "       path='s3://awswrangler-opensearch/dataload/doc1.json',\n",
+    "       index='tutorials'\n",
+    "     )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.2 Write from CSV files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wr.opensearch.index_csv(\n",
+    "       client=client,\n",
+    "       path='s3://awswrangler-opensearch/dataload/doc1.csv',\n",
+    "       index='tutorials'\n",
+    "     )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Search Indices"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 3.1 Search by DSL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = wr.opensearch.search(\n",
+    "         client=client,\n",
+    "         index='tutorials',\n",
+    "         search_body={\n",
+    "           \"query\": {\n",
+    "            \"match_all\": {\n",
+    "             }\n",
+    "           }\n",
+    "          }\n",
+    "     )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 3.1 Search by SQL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = wr.opensearch.search_by_sql(\n",
+    "         client=client,\n",
+    "         sql_query='SELECT * FROM tutorials LIMIT 50'\n",
+    "     )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Delete Index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "response = wr.opensearch.delete_index(\n",
+    "     client=client,\n",
+    "     index=\"tutorials\",\n",
+    " )"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 982975505167c7a8efe42ffa3e79515d7cb3d94e Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 14:49:00 -0400
Subject: [PATCH 21/41] typing fixes

---
 awswrangler/opensearch/_read.py       | 22 +++++-------
 awswrangler/opensearch/_utils.py      | 19 ++++++-----
 awswrangler/opensearch/_write.py      | 49 ++++++++++++++-------------
 test_infra/stacks/opensearch_stack.py | 20 +++--------
 4 files changed, 50 insertions(+), 60 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index 33a2a9b2c..cb918a74d 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -1,6 +1,6 @@
 """Amazon OpenSearch Read Module (PRIVATE)."""
 
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Mapping, List, Union
 
 import pandas as pd
 from elasticsearch import Elasticsearch
@@ -9,7 +9,7 @@
 from awswrangler.opensearch._utils import _get_distribution
 
 
-def _resolve_fields(row):
+def _resolve_fields(row: Mapping[str, Any]) -> Mapping[str, Any]:
     fields = {}
     for field in row:
         if isinstance(row[field], dict):
@@ -21,8 +21,8 @@ def _resolve_fields(row):
     return fields
 
 
-def _hit_to_row(hit):
-    row = {}
+def _hit_to_row(hit: Mapping[str, Any]) -> Mapping[str, Any]:
+    row: Dict[str, Any] = {}
     for k in hit.keys():
         if k == "_source":
             solved_fields = _resolve_fields(hit["_source"])
@@ -32,11 +32,11 @@ def _hit_to_row(hit):
     return row
 
 
-def _search_response_to_documents(response: dict):
+def _search_response_to_documents(response: Mapping[str, Any]) -> List[Mapping[str, Any]]:
     return [_hit_to_row(hit) for hit in response["hits"]["hits"]]
 
 
-def _search_response_to_df(response: dict):
+def _search_response_to_df(response: Union[Mapping[str, Any], Any]) -> pd.DataFrame:
     return pd.DataFrame(_search_response_to_documents(response))
 
 
@@ -46,7 +46,7 @@ def search(
     search_body: Optional[Dict[str, Any]] = None,
     doc_type: Optional[str] = None,
     is_scroll: Optional[bool] = False,
-    **kwargs,
+    **kwargs: Any,
 ) -> pd.DataFrame:
     """Returns results matching query DSL as pandas dataframe.
 
@@ -107,7 +107,7 @@ def search(
     return df
 
 
-def search_by_sql(client: Elasticsearch, sql_query: str, **kwargs) -> pd.DataFrame:
+def search_by_sql(client: Elasticsearch, sql_query: str, **kwargs: Any) -> pd.DataFrame:
     """Returns results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe
 
     Parameters
@@ -138,12 +138,6 @@ def search_by_sql(client: Elasticsearch, sql_query: str, **kwargs) -> pd.DataFra
 
     """
 
-    # can be used if not passing format
-    def _sql_response_to_docs(response: Dict[str, Any]):
-        header = list(map(lambda x: x["name"], response.get("schema", [])))
-        for datarow in response.get("datarows", []):
-            yield dict(zip(header, datarow))
-
     if _get_distribution(client) == "opensearch":
         url = "/_plugins/_sql"
     else:
diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index f15f6105c..04b40b422 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -1,7 +1,7 @@
 """Amazon OpenSearch Utils Module (PRIVATE)."""
 
 import logging
-from typing import Optional
+from typing import Optional, Any
 
 import boto3
 from elasticsearch import Elasticsearch, RequestsHttpConnection
@@ -10,15 +10,15 @@
 _logger: logging.Logger = logging.getLogger(__name__)
 
 
-def _get_distribution(client: Elasticsearch) -> str:
+def _get_distribution(client: Elasticsearch) -> Any:
     return client.info().get("version", {}).get("distribution", "elasticsearch")
 
 
-def _get_version(client: Elasticsearch):
+def _get_version(client: Elasticsearch) -> Any:
     return client.info().get("version", {}).get("number")
 
 
-def _get_version_major(client: Elasticsearch):
+def _get_version_major(client: Elasticsearch) -> Any:
     version = _get_version(client)
     if version:
         return int(version.split(".")[0])
@@ -78,10 +78,13 @@ def connect(
     if fgac_user and fgac_password:
         http_auth = (fgac_user, fgac_password)
     else:
-        if region is None:
-            region = boto3_session.region_name
-        creds = boto3_session.get_credentials()
-        http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", creds.token)
+        if boto3_session is None:
+            raise ValueError('Please provide either boto3_session or fgac_user+fgac_password')
+        else:
+            if region is None:
+                region = boto3_session.region_name
+            creds = boto3_session.get_credentials()
+            http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", creds.token)
     try:
         es = Elasticsearch(
             host=host,
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index ace119b02..1781473d3 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -5,7 +5,7 @@
 import logging
 import uuid
 from pathlib import Path
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union, Generator
 
 import boto3
 import pandas as pd
@@ -19,10 +19,10 @@
 _logger: logging.Logger = logging.getLogger(__name__)
 
 
-def _selected_keys(document: Dict, keys_to_write: Optional[List[str]]):
+def _selected_keys(document: Mapping[str, Any], keys_to_write: Optional[List[str]]) -> Mapping[str, Any]:
     if keys_to_write is None:
-        keys_to_write = document.keys()
-    keys_to_write = filter(lambda x: x != "_id", keys_to_write)
+        keys_to_write = list(document.keys())
+    keys_to_write = list(filter(lambda x: x != "_id", keys_to_write))
     return {key: document[key] for key in keys_to_write}
 
 
@@ -32,7 +32,7 @@ def _actions_generator(
     doc_type: Optional[str],
     keys_to_write: Optional[List[str]],
     id_keys: Optional[List[str]],
-):
+) -> Generator[Dict[str, Any], None, None]:
     for document in documents:
         if id_keys:
             _id = "-".join(list(map(lambda x: str(document[x]), id_keys)))
@@ -46,8 +46,8 @@ def _actions_generator(
         }
 
 
-def _df_doc_generator(df: pd.DataFrame):
-    def _deserialize(v):
+def _df_doc_generator(df: pd.DataFrame) -> Generator[Dict[str, Any], None, None]:
+    def _deserialize(v: Any) -> Any:
         if isinstance(v, str):
             v = v.strip()
             if v.startswith("{") and v.endswith("}") or v.startswith("[") and v.endswith("]"):
@@ -66,7 +66,7 @@ def _deserialize(v):
         yield {k: _deserialize(v) for k, v in document.items() if notna(v)}
 
 
-def _file_line_generator(path: str, is_json: bool = False):
+def _file_line_generator(path: str, is_json: bool = False) -> Generator[Any, None, None]:
     with open(path) as fp:
         for line in fp:
             if is_json:
@@ -141,10 +141,10 @@ def create_index(
     if settings:
         body["settings"] = settings
     if body == {}:
-        body = None
+        body = None  # type: ignore
 
     # ignore 400 cause by IndexAlreadyExistsException when creating an index
-    response = client.indices.create(index, body, ignore=400)
+    response: Dict[str, Any] = client.indices.create(index, body=body, ignore=400)
     if "error" in response:
         _logger.warning(response)
         if str(response["error"]).startswith("MapperParsingException"):
@@ -181,7 +181,7 @@ def delete_index(client: Elasticsearch, index: str) -> Dict[str, Any]:
     """
 
     # ignore 400/404 IndexNotFoundError exception
-    response = client.indices.delete(index, ignore=[400, 404])
+    response: Dict[str, Any] = client.indices.delete(index, ignore=[400, 404])
     if "error" in response:
         _logger.warning(response)
     return response
@@ -189,11 +189,11 @@ def delete_index(client: Elasticsearch, index: str) -> Dict[str, Any]:
 
 def index_json(
     client: Elasticsearch,
-    path: Union[str, Path],
+    path: str,
     index: str,
     doc_type: Optional[str] = None,
     boto3_session: Optional[boto3.Session] = boto3.Session(),
-    **kwargs,
+    **kwargs: Any,
 ) -> Dict[str, Any]:
     """Index all documents from JSON file to OpenSearch index.
 
@@ -203,7 +203,7 @@ def index_json(
     ----------
     client : Elasticsearch
         instance of elasticsearch.Elasticsearch to use.
-    path : Union[str, Path]
+    path : str
         s3 or local path to the JSON file which contains the documents.
     index : str
         Name of the index.
@@ -236,25 +236,28 @@ def index_json(
     """
     # Loading data from file
 
+    if boto3_session is None:
+        raise ValueError('boto3_session cannot be None')
+
     if path.startswith("s3://"):
         bucket, key = parse_path(path)
         s3 = boto3_session.client("s3")
         obj = s3.get_object(Bucket=bucket, Key=key)
         body = obj["Body"].read()
         lines = body.splitlines()
-        documents = map(lambda x: json.loads(x), lines)
+        documents = list(map(lambda x: json.loads(x), lines))  # type: ignore
     else:  # local path
-        documents = _file_line_generator(path, is_json=True)
+        documents = list(_file_line_generator(path, is_json=True))
     return index_documents(client=client, documents=documents, index=index, doc_type=doc_type, **kwargs)
 
 
 def index_csv(
     client: Elasticsearch,
-    path: Union[str, Path],
+    path: str,
     index: str,
     doc_type: Optional[str] = None,
-    pandas_kwargs: Optional[Dict[str, Any]] = {},
-    **kwargs,
+    pandas_kwargs: Dict[str, Any] = {},
+    **kwargs: Any,
 ) -> Dict[str, Any]:
     """Index all documents from a CSV file to OpenSearch index.
 
@@ -262,13 +265,13 @@ def index_csv(
     ----------
     client : Elasticsearch
         instance of elasticsearch.Elasticsearch to use.
-    path : Union[str, Path]
+    path : str
         s3 or local path to the CSV file which contains the documents.
     index : str
         Name of the index.
     doc_type : str, optional
         Name of the document type (only for Elasticsearch versions 5.x and older).
-    pandas_kwargs :
+    pandas_kwargs : Dict[str, Any], optional
         Dictionary of arguments forwarded to pandas.read_csv().
         e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']}
         https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
@@ -317,7 +320,7 @@ def index_csv(
 
 
 def index_df(
-    client: Elasticsearch, df: pd.DataFrame, index: str, doc_type: Optional[str] = None, **kwargs
+    client: Elasticsearch, df: pd.DataFrame, index: str, doc_type: Optional[str] = None, **kwargs: Any
 ) -> Dict[str, Any]:
     """Index all documents from a DataFrame to OpenSearch index.
 
@@ -371,7 +374,7 @@ def index_documents(
     max_retries: Optional[int] = 0,
     initial_backoff: Optional[int] = 2,
     max_backoff: Optional[int] = 600,
-    **kwargs,
+    **kwargs: Any,
 ) -> Dict[str, Any]:
     """Index all documents to OpenSearch index.
 
diff --git a/test_infra/stacks/opensearch_stack.py b/test_infra/stacks/opensearch_stack.py
index d5f6d1c67..f3bc6a1f8 100644
--- a/test_infra/stacks/opensearch_stack.py
+++ b/test_infra/stacks/opensearch_stack.py
@@ -60,9 +60,7 @@ def _setup_opensearch_1_0(self) -> None:
             domain_name,
             domain_name=domain_name,
             version=opensearch.EngineVersion.OPENSEARCH_1_0,
-            capacity=opensearch.CapacityConfig(
-                data_node_instance_type="t3.small.search", data_nodes=1
-            ),
+            capacity=opensearch.CapacityConfig(data_node_instance_type="t3.small.search", data_nodes=1),
             access_policies=[
                 iam.PolicyStatement(
                     effect=iam.Effect.ALLOW,
@@ -74,9 +72,7 @@ def _setup_opensearch_1_0(self) -> None:
             removal_policy=cdk.RemovalPolicy.DESTROY,
         )
 
-        cdk.CfnOutput(
-            self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint
-        )
+        cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint)
 
     def _setup_elasticsearch_7_10_fgac(self) -> None:
         domain_name = "wrangler-es-7-10-fgac"
@@ -87,9 +83,7 @@ def _setup_elasticsearch_7_10_fgac(self) -> None:
             domain_name,
             domain_name=domain_name,
             version=opensearch.EngineVersion.ELASTICSEARCH_7_10,
-            capacity=opensearch.CapacityConfig(
-                data_node_instance_type="t3.small.search", data_nodes=1
-            ),
+            capacity=opensearch.CapacityConfig(data_node_instance_type="t3.small.search", data_nodes=1),
             access_policies=[
                 iam.PolicyStatement(
                     effect=iam.Effect.ALLOW,
@@ -103,13 +97,9 @@ def _setup_elasticsearch_7_10_fgac(self) -> None:
                 master_user_password=self.password_secret,
             ),
             node_to_node_encryption=True,
-            encryption_at_rest=opensearch.EncryptionAtRestOptions(
-                enabled=True, kms_key=self.key
-            ),
+            encryption_at_rest=opensearch.EncryptionAtRestOptions(enabled=True, kms_key=self.key),
             enforce_https=True,
             removal_policy=cdk.RemovalPolicy.DESTROY,
         )
 
-        cdk.CfnOutput(
-            self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint
-        )
+        cdk.CfnOutput(self, f"DomainEndpoint-{domain_name}", value=domain.domain_endpoint)

From 0120e31865641810bf09db41a90b42ecdc5e5fc9 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 14:51:55 -0400
Subject: [PATCH 22/41] [skip ci] isort

---
 awswrangler/opensearch/_read.py  | 2 +-
 awswrangler/opensearch/_utils.py | 2 +-
 awswrangler/opensearch/_write.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index cb918a74d..49046830b 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -1,6 +1,6 @@
 """Amazon OpenSearch Read Module (PRIVATE)."""
 
-from typing import Any, Dict, Optional, Mapping, List, Union
+from typing import Any, Dict, List, Mapping, Optional, Union
 
 import pandas as pd
 from elasticsearch import Elasticsearch
diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index 04b40b422..283531601 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -1,7 +1,7 @@
 """Amazon OpenSearch Utils Module (PRIVATE)."""
 
 import logging
-from typing import Optional, Any
+from typing import Any, Optional
 
 import boto3
 from elasticsearch import Elasticsearch, RequestsHttpConnection
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 1781473d3..8ece6f6a9 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -5,7 +5,7 @@
 import logging
 import uuid
 from pathlib import Path
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union, Generator
+from typing import Any, Dict, Generator, Iterable, List, Mapping, Optional, Tuple, Union
 
 import boto3
 import pandas as pd

From b4700f6a4cab2d73be9ac8a3ba5d6ede36ec7f89 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 14:52:36 -0400
Subject: [PATCH 23/41] [skip ci] black opensearch

---
 awswrangler/opensearch/_utils.py | 2 +-
 awswrangler/opensearch/_write.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index 283531601..a22f68339 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -79,7 +79,7 @@ def connect(
         http_auth = (fgac_user, fgac_password)
     else:
         if boto3_session is None:
-            raise ValueError('Please provide either boto3_session or fgac_user+fgac_password')
+            raise ValueError("Please provide either boto3_session or fgac_user+fgac_password")
         else:
             if region is None:
                 region = boto3_session.region_name
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 8ece6f6a9..671809058 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -237,7 +237,7 @@ def index_json(
     # Loading data from file
 
     if boto3_session is None:
-        raise ValueError('boto3_session cannot be None')
+        raise ValueError("boto3_session cannot be None")
 
     if path.startswith("s3://"):
         bucket, key = parse_path(path)

From 51b81103c3b7293fd6e65774de3f9ac435d2a40b Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 17:08:34 -0400
Subject: [PATCH 24/41] [skip ci] opensearch validation

---
 awswrangler/opensearch/_read.py  | 19 +++++++++++--------
 awswrangler/opensearch/_utils.py | 16 ++++++++--------
 awswrangler/opensearch/_write.py | 32 +++++++++++++++++---------------
 tests/test_opensearch.py         |  8 ++++----
 4 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index 49046830b..5afe44d80 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -48,7 +48,7 @@ def search(
     is_scroll: Optional[bool] = False,
     **kwargs: Any,
 ) -> pd.DataFrame:
-    """Returns results matching query DSL as pandas dataframe.
+    """Return results matching query DSL as pandas dataframe.
 
     Parameters
     ----------
@@ -62,12 +62,16 @@ def search(
     doc_type : str, optional
         Name of the document type (for Elasticsearch versions 5.x and earlier).
     is_scroll : bool, optional
-        Allows to retrieve a large numbers of results from a single search request using [scroll](https://opensearch.org/docs/opensearch/rest-api/scroll/)
+        Allows to retrieve a large numbers of results from a single search request using
+        [scroll](https://opensearch.org/docs/opensearch/rest-api/scroll/)
         for example, for machine learning jobs.
-        Because scroll search contexts consume a lot of memory, we suggest you don’t use the scroll operation for frequent user queries.
+        Because scroll search contexts consume a lot of memory, we suggest you don’t use the scroll operation
+        for frequent user queries.
     **kwargs :
-        KEYWORD arguments forwarded to [elasticsearch.Elasticsearch.search](https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch.Elasticsearch.search)
-        and also to [elasticsearch.helpers.scan](https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan) if `is_scroll=True`
+        KEYWORD arguments forwarded to [elasticsearch.Elasticsearch.search]\
+(https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch.Elasticsearch.search)
+        and also to [elasticsearch.helpers.scan](https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan)
+         if `is_scroll=True`
 
     Returns
     -------
@@ -99,7 +103,7 @@ def search(
 
     if is_scroll:
         documents_generator = scan(client, index=index, query=search_body, **kwargs)
-        documents = map(lambda x: _hit_to_row(x), documents_generator)
+        documents = [_hit_to_row(doc) for doc in documents_generator]
         df = pd.DataFrame(documents)
     else:
         response = client.search(index=index, body=search_body, **kwargs)
@@ -108,7 +112,7 @@ def search(
 
 
 def search_by_sql(client: Elasticsearch, sql_query: str, **kwargs: Any) -> pd.DataFrame:
-    """Returns results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe
+    """Return results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe.
 
     Parameters
     ----------
@@ -137,7 +141,6 @@ def search_by_sql(client: Elasticsearch, sql_query: str, **kwargs: Any) -> pd.Da
 
 
     """
-
     if _get_distribution(client) == "opensearch":
         url = "/_plugins/_sql"
     else:
diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index a22f68339..4d05c41f6 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -33,7 +33,7 @@ def connect(
     fgac_user: Optional[str] = None,
     fgac_password: Optional[str] = None,
 ) -> Elasticsearch:
-    """Creates a secure connection to the specified Amazon OpenSearch domain.
+    """Create a secure connection to the specified Amazon OpenSearch domain.
 
     Note
     ----
@@ -45,7 +45,8 @@ def connect(
     https://aws.amazon.com/blogs/opensource/keeping-clients-of-opensearch-and-elasticsearch-compatible-with-open-source/
     https://opensearch.org/docs/clients/index/
 
-    The username and password are mandatory if the OS Cluster uses [Fine Grained Access Control](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html).
+    The username and password are mandatory if the OS Cluster uses [Fine Grained Access Control]\
+(https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html).
     If fine grained access control is disabled, session access key and secret keys are used.
 
     Parameters
@@ -69,7 +70,6 @@ def connect(
         Elasticsearch low-level client.
         https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch
     """
-
     valid_ports = {80, 443}
 
     if port not in valid_ports:
@@ -80,11 +80,11 @@ def connect(
     else:
         if boto3_session is None:
             raise ValueError("Please provide either boto3_session or fgac_user+fgac_password")
-        else:
-            if region is None:
-                region = boto3_session.region_name
-            creds = boto3_session.get_credentials()
-            http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", creds.token)
+        # else:
+        if region is None:
+            region = boto3_session.region_name
+        creds = boto3_session.get_credentials()
+        http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", creds.token)
     try:
         es = Elasticsearch(
             host=host,
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 671809058..3f65974ce 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -4,7 +4,6 @@
 import json
 import logging
 import uuid
-from pathlib import Path
 from typing import Any, Dict, Generator, Iterable, List, Mapping, Optional, Tuple, Union
 
 import boto3
@@ -35,7 +34,7 @@ def _actions_generator(
 ) -> Generator[Dict[str, Any], None, None]:
     for document in documents:
         if id_keys:
-            _id = "-".join(list(map(lambda x: str(document[x]), id_keys)))
+            _id = "-".join([document[id_key] for id_key in id_keys])
         else:
             _id = document.get("_id", uuid.uuid4())
         yield {
@@ -53,16 +52,18 @@ def _deserialize(v: Any) -> Any:
             if v.startswith("{") and v.endswith("}") or v.startswith("[") and v.endswith("]"):
                 try:
                     v = json.loads(v)
-                except Exception as e:
+                except json.decoder.JSONDecodeError:
                     try:
                         v = ast.literal_eval(v)  # if properties are enclosed with single quotes
-                    except:
-                        _logger.warning(f"could not convert string to json: {v}")
+                        if not isinstance(v, dict):
+                            _logger.warning("could not convert string to json: %s", v)
+                    except SyntaxError as e:
+                        _logger.warning("could not convert string to json: %s", v)
                         _logger.warning(e)
         return v
 
     df_iter = df.iterrows()
-    for i, document in df_iter:
+    for _, document in df_iter:
         yield {k: _deserialize(v) for k, v in document.items() if notna(v)}
 
 
@@ -82,7 +83,7 @@ def create_index(
     settings: Optional[Dict[str, Any]] = None,
     mappings: Optional[Dict[str, Any]] = None,
 ) -> Dict[str, Any]:
-    """Creates an index.
+    """Create an index.
 
     Parameters
     ----------
@@ -128,7 +129,6 @@ def create_index(
     ... )
 
     """
-
     body = {}
     if mappings:
         if _get_distribution(client) == "opensearch" or _get_version_major(client) >= 7:
@@ -153,7 +153,7 @@ def create_index(
 
 
 def delete_index(client: Elasticsearch, index: str) -> Dict[str, Any]:
-    """Creates an index.
+    """Create an index.
 
     Parameters
     ----------
@@ -179,7 +179,6 @@ def delete_index(client: Elasticsearch, index: str) -> Dict[str, Any]:
     ... )
 
     """
-
     # ignore 400/404 IndexNotFoundError exception
     response: Dict[str, Any] = client.indices.delete(index, ignore=[400, 404])
     if "error" in response:
@@ -245,7 +244,7 @@ def index_json(
         obj = s3.get_object(Bucket=bucket, Key=key)
         body = obj["Body"].read()
         lines = body.splitlines()
-        documents = list(map(lambda x: json.loads(x), lines))  # type: ignore
+        documents = [json.loads(line) for line in lines]
     else:  # local path
         documents = list(_file_line_generator(path, is_json=True))
     return index_documents(client=client, documents=documents, index=index, doc_type=doc_type, **kwargs)
@@ -256,7 +255,7 @@ def index_csv(
     path: str,
     index: str,
     doc_type: Optional[str] = None,
-    pandas_kwargs: Dict[str, Any] = {},
+    pandas_kwargs: Optional[Dict[str, Any]] = None,
     **kwargs: Any,
 ) -> Dict[str, Any]:
     """Index all documents from a CSV file to OpenSearch index.
@@ -309,6 +308,8 @@ def index_csv(
     ...     pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']}
     ... )
     """
+    if pandas_kwargs is None:
+        pandas_kwargs = {}
     enforced_pandas_params = {
         "skip_blank_lines": True,
         # 'na_filter': True  # will generate Nan value for empty cells. We remove Nan keys in _df_doc_generator
@@ -357,7 +358,6 @@ def index_df(
     ...     index='sample-index1'
     ... )
     """
-
     return index_documents(client=client, documents=_df_doc_generator(df), index=index, doc_type=doc_type, **kwargs)
 
 
@@ -415,8 +415,10 @@ def index_documents(
         maximum number of seconds a retry will wait (default: 600)
     **kwargs :
         KEYWORD arguments forwarded to bulk operation
-        elasticsearch >= 7.10.2 / opensearch: https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
-        elasticsearch < 7.10.2: https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters
+        elasticsearch >= 7.10.2 / opensearch: \
+https://opensearch.org/docs/opensearch/rest-api/document-apis/bulk/#url-parameters
+        elasticsearch < 7.10.2: \
+https://opendistro.github.io/for-elasticsearch-docs/docs/elasticsearch/rest-api-reference/#url-parameters
 
     Returns
     -------
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 68a5f4f8b..df40e2042 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -231,7 +231,7 @@ def test_index_documents_no_id_keys(client):
 
 def test_search(client):
     index = "test_search"
-    response = wr.opensearch.index_documents(
+    wr.opensearch.index_documents(
         client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
     )
     df = wr.opensearch.search(
@@ -248,7 +248,7 @@ def test_search(client):
 
 def test_search_filter_path(client):
     index = "test_search"
-    response = wr.opensearch.index_documents(
+    wr.opensearch.index_documents(
         client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
     )
     df = wr.opensearch.search(
@@ -266,7 +266,7 @@ def test_search_filter_path(client):
 
 def test_search_scroll(client):
     index = "test_search_scroll"
-    response = wr.opensearch.index_documents(
+    wr.opensearch.index_documents(
         client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
     )
     df = wr.opensearch.search(
@@ -280,7 +280,7 @@ def test_search_scroll(client):
 
 def test_search_sql(client):
     index = "test_search_sql"
-    response = wr.opensearch.index_documents(
+    wr.opensearch.index_documents(
         client, documents=inspections_documents, index=index, id_keys=["inspection_id"], refresh="wait_for"
     )
     df = wr.opensearch.search_by_sql(client, sql_query=f"select * from {index}")

From 39457fc1fc09d3ab09ace66ad36cb33a4bd6fcbf Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 20:50:03 -0400
Subject: [PATCH 25/41] [skip ci] opensearch: poetry add requests-aws4auth and
 elasticsearch

---
 poetry.lock    | 40 +++++++++++++++++++++++++++++++++++++++-
 pyproject.toml |  2 ++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index fab569724..c20cccf14 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -433,6 +433,24 @@ category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 
+[[package]]
+name = "elasticsearch"
+version = "7.13.4"
+description = "Python client for Elasticsearch"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
+
+[package.dependencies]
+certifi = "*"
+urllib3 = ">=1.21.1,<2"
+
+[package.extras]
+async = ["aiohttp (>=3,<4)"]
+develop = ["requests (>=2.0.0,<3.0.0)", "coverage", "mock", "pyyaml", "pytest", "pytest-cov", "sphinx (<1.7)", "sphinx-rtd-theme", "black", "jinja2"]
+docs = ["sphinx (<1.7)", "sphinx-rtd-theme"]
+requests = ["requests (>=2.4.0,<3.0.0)"]
+
 [[package]]
 name = "entrypoints"
 version = "0.3"
@@ -1617,6 +1635,18 @@ urllib3 = ">=1.21.1,<1.27"
 security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"]
 socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
 
+[[package]]
+name = "requests-aws4auth"
+version = "1.1.1"
+description = "AWS4 authentication for Requests"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+requests = "*"
+six = "*"
+
 [[package]]
 name = "requests-unixsocket"
 version = "0.2.0"
@@ -2113,7 +2143,7 @@ sqlserver = ["pyodbc"]
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.6.2, <3.10"
-content-hash = "d944bf99e7c7b4406442e1fcbc7125036eafb9081a7ca7638e9b9d9c377bb943"
+content-hash = "b273034ed005e309039619d35491a4c4f615b49334487db5aa982eada794901a"
 
 [metadata.files]
 aiobotocore = [
@@ -2423,6 +2453,10 @@ docutils = [
     {file = "docutils-0.17.1-py2.py3-none-any.whl", hash = "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"},
     {file = "docutils-0.17.1.tar.gz", hash = "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125"},
 ]
+elasticsearch = [
+    {file = "elasticsearch-7.13.4-py2.py3-none-any.whl", hash = "sha256:5920df0ab2630778680376d86bea349dc99860977eec9b6d2bd0860f337313f2"},
+    {file = "elasticsearch-7.13.4.tar.gz", hash = "sha256:52dda85f76eeb85ec873bf9ffe0ba6849e544e591f66d4048a5e48016de268e0"},
+]
 entrypoints = [
     {file = "entrypoints-0.3-py2.py3-none-any.whl", hash = "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19"},
     {file = "entrypoints-0.3.tar.gz", hash = "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"},
@@ -3184,6 +3218,10 @@ requests = [
     {file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"},
     {file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"},
 ]
+requests-aws4auth = [
+    {file = "requests-aws4auth-1.1.1.tar.gz", hash = "sha256:c0883346ce30b5018903a67da88df72f73ff06e1a320845bba9cd85e811ba0ba"},
+    {file = "requests_aws4auth-1.1.1-py2.py3-none-any.whl", hash = "sha256:dfd9f930ffde48a756b72b55698a8522875ea6358dcffbcc44a66700ace31783"},
+]
 requests-unixsocket = [
     {file = "requests-unixsocket-0.2.0.tar.gz", hash = "sha256:9e5c1a20afc3cf786197ae59c79bcdb0e7565f218f27df5f891307ee8817c1ea"},
     {file = "requests_unixsocket-0.2.0-py2.py3-none-any.whl", hash = "sha256:014d07bfb66dc805a011a8b4b306cf4ec96d2eddb589f6b2b5765e626f0dc0cc"},
diff --git a/pyproject.toml b/pyproject.toml
index ff715d6d9..ae3d1f2ee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,8 @@ pyodbc = { version = "~4.0.30", optional = true }
 sphinx-bootstrap-theme = "^0.8.0"
 Sphinx = "^4.2.0"
 tox = "^3.24.4"
+elasticsearch = "7.13.4"
+requests-aws4auth = "^1.1.1"
 
 
 [tool.poetry.extras]

From 7be50627da9dbe5bb7a52c849ae7398817c51408 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 28 Sep 2021 22:12:42 -0400
Subject: [PATCH 26/41] [skip ci] opensearch: add support for host with schema
 http/https

---
 awswrangler/opensearch/_utils.py | 8 +++++++-
 tests/test_opensearch.py         | 6 ++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index 4d05c41f6..d84c319cc 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -1,6 +1,7 @@
 """Amazon OpenSearch Utils Module (PRIVATE)."""
 
 import logging
+import re
 from typing import Any, Optional
 
 import boto3
@@ -25,6 +26,11 @@ def _get_version_major(client: Elasticsearch) -> Any:
     return None
 
 
+def _strip_endpoint(endpoint: str) -> str:
+    uri_schema = re.compile(r"https?://")
+    return uri_schema.sub("", endpoint).strip().strip("/")
+
+
 def connect(
     host: str,
     port: Optional[int] = 443,
@@ -87,7 +93,7 @@ def connect(
         http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", creds.token)
     try:
         es = Elasticsearch(
-            host=host,
+            host=_strip_endpoint(host),
             port=port,
             http_auth=http_auth,
             use_ssl=True,
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index df40e2042..0048ba937 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -149,6 +149,12 @@ def test_connection_opensearch_1_0(domain_endpoint_opensearch_1_0):
     assert len(client.info()) > 0
 
 
+def test_connection_opensearch_1_0_https(domain_endpoint_opensearch_1_0):
+    client = wr.opensearch.connect(host=f"https://{domain_endpoint_opensearch_1_0}")
+    print(client.info())
+    assert len(client.info()) > 0
+
+
 def test_connection_elasticsearch_7_10_fgac(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
     client = wr.opensearch.connect(
         host=domain_endpoint_elasticsearch_7_10_fgac, fgac_user="test", fgac_password=opensearch_password

From cb8656c316c4eb06ab8e82067e96654410d0d40e Mon Sep 17 00:00:00 2001
From: Muralidhar Reddy <mureddy@amazon.com>
Date: Wed, 29 Sep 2021 11:42:02 +0530
Subject: [PATCH 27/41] Update 031 - OpenSearch.ipynb

Fixed typo's
---
 tutorials/031 - OpenSearch.ipynb | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tutorials/031 - OpenSearch.ipynb b/tutorials/031 - OpenSearch.ipynb
index cb5cbbbf4..f1b2b5ccb 100644
--- a/tutorials/031 - OpenSearch.ipynb	
+++ b/tutorials/031 - OpenSearch.ipynb	
@@ -67,22 +67,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "response = wr.opensearch.create_index(\n",
-    "     client=client,\n",
-    "     index=\"tutorials\",\n",
-    "     mappings={\n",
-    "         \"properties\": {\n",
-    "              \"id\":  { \"type\" : \"integer\" },\n",
-    "              \"id\":  { \"type\" : \"string\" },\n",
-    "          }\n",
-    "     },\n",
-    "     settings={\n",
-    "             \"index\": {\n",
-    "                 \"number_of_shards\": 2\n",
-    "                 \"number_of_replicas\": 1\n",
-    "             }\n",
-    "      }\n",
-    ")\n"
+     "response = wr.opensearch.create_index(\n",
+     "     client=client,\n",
+     "     index=\"tutorials\",\n",
+     "     mappings={\n",
+     "       \"properties\": {\n",
+     "          \"id\":  { \"type\" : \"integer\" },\n",
+     "          \"name\":  { \"type\" : \"string\" }\n",
+     "        }\n",
+     "     },\n",
+     "     settings={\n",
+     "         \"index\": {\n",
+     "             \"number_of_shards\": 2,\n",
+     "             \"number_of_replicas\": 1\n",
+     "          }\n",
+     "     }\n",
+     " )\n"
    ]
   },
   {

From 22b5e9b993fcd2c4bd1d0691f45b2b8db567337e Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Thu, 30 Sep 2021 01:45:14 -0400
Subject: [PATCH 28/41] [skip ci] opensearch: index_documents 429 error

---
 awswrangler/opensearch/_write.py | 147 ++++++++++++++++++++++++++-----
 tests/test_opensearch.py         |  12 +++
 2 files changed, 137 insertions(+), 22 deletions(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 3f65974ce..7386941bf 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -9,13 +9,17 @@
 import boto3
 import pandas as pd
 from elasticsearch import Elasticsearch
+from elasticsearch.exceptions import NotFoundError
 from elasticsearch.helpers import bulk
+from jsonpath_ng import parse
+from jsonpath_ng.exceptions import JsonPathParserError
 from pandas import notna
 
 from awswrangler._utils import parse_path
 from awswrangler.opensearch._utils import _get_distribution, _get_version_major
 
 _logger: logging.Logger = logging.getLogger(__name__)
+_logger.setLevel(logging.DEBUG)
 
 
 def _selected_keys(document: Mapping[str, Any], keys_to_write: Optional[List[str]]) -> Mapping[str, Any]:
@@ -31,18 +35,27 @@ def _actions_generator(
     doc_type: Optional[str],
     keys_to_write: Optional[List[str]],
     id_keys: Optional[List[str]],
-) -> Generator[Dict[str, Any], None, None]:
-    for document in documents:
+    bulk_size: int = 10000,
+) -> Generator[List[Dict[str, Any]], None, None]:
+    bulk_chunk_documents = []
+    for i, document in enumerate(documents):
         if id_keys:
             _id = "-".join([document[id_key] for id_key in id_keys])
         else:
             _id = document.get("_id", uuid.uuid4())
-        yield {
-            "_index": index,
-            "_type": doc_type,
-            "_id": _id,
-            "_source": _selected_keys(document, keys_to_write),
-        }
+        bulk_chunk_documents.append(
+            {
+                "_index": index,
+                "_type": doc_type,
+                "_id": _id,
+                "_source": _selected_keys(document, keys_to_write),
+            }
+        )
+        if (i + 1) % bulk_size == 0:
+            yield bulk_chunk_documents
+            bulk_chunk_documents = []
+    if len(bulk_chunk_documents) > 0:
+        yield bulk_chunk_documents
 
 
 def _df_doc_generator(df: pd.DataFrame) -> Generator[Dict[str, Any], None, None]:
@@ -76,6 +89,51 @@ def _file_line_generator(path: str, is_json: bool = False) -> Generator[Any, Non
                 yield line.strip()
 
 
+def _get_documents_w_json_path(documents: List[Mapping[str, Any]], json_path: str) -> List[Any]:
+    try:
+        jsonpath_expression = parse(json_path)
+    except JsonPathParserError as e:
+        _logger.error("invalid json_path: %s", json_path)
+        raise e
+    output_documents = []
+    for doc in documents:
+        for match in jsonpath_expression.find(doc):
+            match_value = match.value
+            if isinstance(match_value, list):
+                output_documents += match_value
+            elif isinstance(match_value, dict):
+                output_documents.append(match_value)
+            else:
+                msg = f"expected json_path value to be a list/dict. received type {type(match_value)} ({match_value})"
+                raise ValueError(msg)
+    return output_documents
+
+
+def _get_refresh_interval(client: Elasticsearch, index: str) -> Any:
+    url = f"/{index}/_settings"
+    try:
+        response = client.transport.perform_request("GET", url)
+        refresh_interval = response.get(index, {}).get("index", {}).get("refresh_interval", "1s")  # type: ignore
+        return refresh_interval
+    except NotFoundError:
+        return None
+
+
+def _set_refresh_interval(client: Elasticsearch, index: str, refresh_interval: str) -> Any:
+    url = f"/{index}/_settings"
+    body = {"index": {"refresh_interval": refresh_interval}}
+    response = client.transport.perform_request("PUT", url, headers={"Content-Type": "application/json"}, body=body)
+
+    return response
+
+
+def _disable_refresh_interval(
+    client: Elasticsearch,
+    index: str,
+) -> Any:
+    return _set_refresh_interval(client=client, index=index, refresh_interval="-1")
+
+
 def create_index(
     client: Elasticsearch,
     index: str,
@@ -192,11 +250,13 @@ def index_json(
     index: str,
     doc_type: Optional[str] = None,
     boto3_session: Optional[boto3.Session] = boto3.Session(),
+    json_path: Optional[str] = None,
     **kwargs: Any,
 ) -> Dict[str, Any]:
     """Index all documents from JSON file to OpenSearch index.
 
-    The JSON file should be in a JSON-Lines text format (newline-delimited JSON) - https://jsonlines.org/.
+    The JSON file should be in a JSON-Lines text format (newline-delimited JSON) - https://jsonlines.org/
+    OR if the is a single large JSON please provide `json_path`.
 
     Parameters
     ----------
@@ -208,6 +268,10 @@ def index_json(
         Name of the index.
     doc_type : str, optional
         Name of the document type (only for Elasticsearch versions 5.x and earlier).
+    json_path : str, optional
+        JsonPath expression to specify explicit path to a single name element
+        in a JSON hierarchical data structure.
+        Read more about [JsonPath](https://jsonpath.com)
     boto3_session : boto3.Session(), optional
         Boto3 Session to be used to access s3 if s3 path is provided.
         The default boto3 Session will be used if boto3_session receive None.
@@ -233,7 +297,7 @@ def index_json(
     ...     index='sample-index1'
     ... )
     """
-    # Loading data from file
+    _logger.debug("indexing %s from %s", index, path)
 
     if boto3_session is None:
         raise ValueError("boto3_session cannot be None")
@@ -245,8 +309,12 @@ def index_json(
         body = obj["Body"].read()
         lines = body.splitlines()
         documents = [json.loads(line) for line in lines]
+        if json_path:
+            documents = _get_documents_w_json_path(documents, json_path)
     else:  # local path
         documents = list(_file_line_generator(path, is_json=True))
+        if json_path:
+            documents = _get_documents_w_json_path(documents, json_path)
     return index_documents(client=client, documents=documents, index=index, doc_type=doc_type, **kwargs)
 
 
@@ -308,6 +376,7 @@ def index_csv(
     ...     pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']}
     ... )
     """
+    _logger.debug("indexing %s from %s", index, path)
     if pandas_kwargs is None:
         pandas_kwargs = {}
     enforced_pandas_params = {
@@ -369,9 +438,10 @@ def index_documents(
     keys_to_write: Optional[List[str]] = None,
     id_keys: Optional[List[str]] = None,
     ignore_status: Optional[Union[List[Any], Tuple[Any]]] = None,
+    bulk_size: int = 1000,
     chunk_size: Optional[int] = 500,
     max_chunk_bytes: Optional[int] = 100 * 1024 * 1024,
-    max_retries: Optional[int] = 0,
+    max_retries: Optional[int] = 2,
     initial_backoff: Optional[int] = 2,
     max_backoff: Optional[int] = 600,
     **kwargs: Any,
@@ -384,6 +454,10 @@ def index_documents(
     https://elasticsearch-py.readthedocs.io/en/v7.13.4/helpers.html#elasticsearch.helpers.bulk
     https://elasticsearch-py.readthedocs.io/en/v7.13.4/helpers.html#elasticsearch.helpers.streaming_bulk
 
+    If you receive `Error 429 (Too Many Requests) /_bulk` please to to decrease `bulk_size` value.
+    Please also consider modifying the cluster size and instance type -
+    Read more here: https://aws.amazon.com/premiumsupport/knowledge-center/resolve-429-error-es/
+
     Parameters
     ----------
     client : Elasticsearch
@@ -401,13 +475,15 @@ def index_documents(
         otherwise will generate unique identifier for each document.
     ignore_status:  Union[List[Any], Tuple[Any]], optional
         list of HTTP status codes that you want to ignore (not raising an exception)
+    bulk_size: int,
+        number of docs in each _bulk request (default: 1000)
     chunk_size : int, optional
         number of docs in one chunk sent to es (default: 500)
     max_chunk_bytes: int, optional
         the maximum size of the request in bytes (default: 100MB)
     max_retries : int, optional
         maximum number of times a document will be retried when
-        ``429`` is received, set to 0 (default) for no retries on ``429`` (default: 0)
+        ``429`` is received, set to 0 (default) for no retries on ``429`` (default: 2)
     initial_backoff : int, optional
         number of seconds we should wait before the first retry.
         Any subsequent retries will be powers of ``initial_backoff*2**retry_number`` (default: 2)
@@ -437,15 +513,42 @@ def index_documents(
     ...     index='sample-index1'
     ... )
     """
-    success, errors = bulk(
-        client=client,
-        actions=_actions_generator(documents, index, doc_type, keys_to_write=keys_to_write, id_keys=id_keys),
-        ignore_status=ignore_status,
-        chunk_size=chunk_size,
-        max_chunk_bytes=max_chunk_bytes,
-        max_retries=max_retries,
-        initial_backoff=initial_backoff,
-        max_backoff=max_backoff,
-        **kwargs,
+    if not isinstance(documents, list):
+        documents = list(documents)
+    total_documents = len(documents)
+    _logger.debug("indexing %s documents into %s", total_documents, index)
+
+    actions = _actions_generator(
+        documents, index, doc_type, keys_to_write=keys_to_write, id_keys=id_keys, bulk_size=bulk_size
     )
+
+    success = 0
+    errors: List[Any] = []
+    refresh_interval = None
+    try:
+        if total_documents > bulk_size:
+            refresh_interval = _get_refresh_interval(client, index)
+            if refresh_interval:
+                _disable_refresh_interval(client, index)
+        for bulk_chunk_documents in actions:
+            _logger.debug("running bulk index of %s documents", len(bulk_chunk_documents))
+            _success, _errors = bulk(
+                client=client,
+                actions=bulk_chunk_documents,
+                ignore_status=ignore_status,
+                chunk_size=chunk_size,
+                max_chunk_bytes=max_chunk_bytes,
+                max_retries=max_retries,
+                initial_backoff=initial_backoff,
+                max_backoff=max_backoff,
+                request_timeout=30,
+                **kwargs,
+            )
+            success += _success
+            errors += _errors  # type: ignore
+            _logger.debug("indexed %s documents (%s/%s)", _success, success, total_documents)
+    finally:
+        if refresh_interval:
+            _set_refresh_interval(client, index, refresh_interval)
+
     return {"success": success, "errors": errors}
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 0048ba937..9c97c6d0f 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -210,6 +210,7 @@ def test_index_df(client):
         index="test_index_df1",
     )
     print(response)
+    assert response.get("success", 0) == 3
 
 
 def test_index_documents(client):
@@ -219,6 +220,7 @@ def test_index_documents(client):
         index="test_index_documents1",
     )
     print(response)
+    assert response.get("success", 0) == 3
 
 
 def test_index_documents_id_keys(client):
@@ -342,3 +344,13 @@ def test_index_csv_s3(client, path):
     response = wr.opensearch.index_csv(client, path=path, index=index)
     print(response)
     assert response.get("success", 0) == 6
+
+
+@pytest.mark.skip(reason="takes a long time (~5 mins) since testing against small clusters")
+def test_index_json_s3_large_file(client):
+    path = "s3://irs-form-990/index_2011.json"
+    response = wr.opensearch.index_json(
+        client, index="test_index_json_s3_large_file", path=path, json_path="Filings2011", id_keys=["EIN"], bulk_size=20
+    )
+    print(response)
+    assert response.get("success", 0) > 0

From c5092a2b8202a2f8abdb830ec9e9582109bf2639 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Thu, 30 Sep 2021 01:46:12 -0400
Subject: [PATCH 29/41] [skip ci] opensearch: add jsonpath_ng library

---
 poetry.lock    | 34 ++++++++++++++++++++++++++++++++--
 pyproject.toml |  1 +
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index c20cccf14..864bdd64f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -391,7 +391,7 @@ python-versions = ">=3.6, <3.7"
 name = "decorator"
 version = "5.0.9"
 description = "Decorators for Humans"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 
@@ -725,6 +725,19 @@ python-versions = "*"
 [package.extras]
 dev = ["hypothesis"]
 
+[[package]]
+name = "jsonpath-ng"
+version = "1.5.3"
+description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+decorator = "*"
+ply = "*"
+six = "*"
+
 [[package]]
 name = "jsonschema"
 version = "3.2.0"
@@ -1287,6 +1300,14 @@ importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
 [package.extras]
 dev = ["pre-commit", "tox"]
 
+[[package]]
+name = "ply"
+version = "3.11"
+description = "Python Lex & Yacc"
+category = "main"
+optional = false
+python-versions = "*"
+
 [[package]]
 name = "prometheus-client"
 version = "0.11.0"
@@ -2143,7 +2164,7 @@ sqlserver = ["pyodbc"]
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.6.2, <3.10"
-content-hash = "b273034ed005e309039619d35491a4c4f615b49334487db5aa982eada794901a"
+content-hash = "e01439ee1c27186731e13aa463473f281cf349c44dd7c9dc3b6112a6c49c533c"
 
 [metadata.files]
 aiobotocore = [
@@ -2565,6 +2586,11 @@ json5 = [
     {file = "json5-0.9.6-py2.py3-none-any.whl", hash = "sha256:823e510eb355949bed817e1f3e2d682455dc6af9daf6066d5698d6a2ca4481c2"},
     {file = "json5-0.9.6.tar.gz", hash = "sha256:9175ad1bc248e22bb8d95a8e8d765958bf0008fef2fe8abab5bc04e0f1ac8302"},
 ]
+jsonpath-ng = [
+    {file = "jsonpath-ng-1.5.3.tar.gz", hash = "sha256:a273b182a82c1256daab86a313b937059261b5c5f8c4fa3fc38b882b344dd567"},
+    {file = "jsonpath_ng-1.5.3-py2-none-any.whl", hash = "sha256:f75b95dbecb8a0f3b86fd2ead21c2b022c3f5770957492b9b6196ecccfeb10aa"},
+    {file = "jsonpath_ng-1.5.3-py3-none-any.whl", hash = "sha256:292a93569d74029ba75ac2dc3d3630fc0e17b2df26119a165fa1d498ca47bf65"},
+]
 jsonschema = [
     {file = "jsonschema-3.2.0-py2.py3-none-any.whl", hash = "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163"},
     {file = "jsonschema-3.2.0.tar.gz", hash = "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"},
@@ -2963,6 +2989,10 @@ pluggy = [
     {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"},
     {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"},
 ]
+ply = [
+    {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"},
+    {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
+]
 prometheus-client = [
     {file = "prometheus_client-0.11.0-py2.py3-none-any.whl", hash = "sha256:b014bc76815eb1399da8ce5fc84b7717a3e63652b0c0f8804092c9363acab1b2"},
     {file = "prometheus_client-0.11.0.tar.gz", hash = "sha256:3a8baade6cb80bcfe43297e33e7623f3118d660d41387593758e2fb1ea173a86"},
diff --git a/pyproject.toml b/pyproject.toml
index ae3d1f2ee..c488464e2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,6 +50,7 @@ Sphinx = "^4.2.0"
 tox = "^3.24.4"
 elasticsearch = "7.13.4"
 requests-aws4auth = "^1.1.1"
+jsonpath-ng = "^1.5.3"
 
 
 [tool.poetry.extras]

From 97a35bde0da80ff9722222fe95ed504e4acf55b3 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Thu, 30 Sep 2021 02:22:29 -0400
Subject: [PATCH 30/41] [skip ci] opensearch: renamed fgac user/password

---
 awswrangler/opensearch/_utils.py | 14 +++++++-------
 tests/test_opensearch.py         |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index d84c319cc..f38b71181 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -36,8 +36,8 @@ def connect(
     port: Optional[int] = 443,
     boto3_session: Optional[boto3.Session] = boto3.Session(),
     region: Optional[str] = None,
-    fgac_user: Optional[str] = None,
-    fgac_password: Optional[str] = None,
+    user: Optional[str] = None,
+    password: Optional[str] = None,
 ) -> Elasticsearch:
     """Create a secure connection to the specified Amazon OpenSearch domain.
 
@@ -65,9 +65,9 @@ def connect(
         Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
     region :
         AWS region of the Amazon OS domain. If not provided will be extracted from boto3_session.
-    fgac_user :
+    user :
         Fine-grained access control user. Mandatory if OS Cluster uses Fine Grained Access Control.
-    fgac_password :
+    password :
         Fine-grained access control password. Mandatory if OS Cluster uses Fine Grained Access Control.
 
     Returns
@@ -81,11 +81,11 @@ def connect(
     if port not in valid_ports:
         raise ValueError("results: port must be one of %r." % valid_ports)
 
-    if fgac_user and fgac_password:
-        http_auth = (fgac_user, fgac_password)
+    if user and password:
+        http_auth = (user, password)
     else:
         if boto3_session is None:
-            raise ValueError("Please provide either boto3_session or fgac_user+fgac_password")
+            raise ValueError("Please provide either boto3_session or FGAC user+password")
         # else:
         if region is None:
             region = boto3_session.region_name
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index 9c97c6d0f..a62d5ed2e 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -157,7 +157,7 @@ def test_connection_opensearch_1_0_https(domain_endpoint_opensearch_1_0):
 
 def test_connection_elasticsearch_7_10_fgac(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
     client = wr.opensearch.connect(
-        host=domain_endpoint_elasticsearch_7_10_fgac, fgac_user="test", fgac_password=opensearch_password
+        host=domain_endpoint_elasticsearch_7_10_fgac, user="test", password=opensearch_password
     )
     print(client.info())
     assert len(client.info()) > 0
@@ -172,7 +172,7 @@ def opensearch_1_0_client(domain_endpoint_opensearch_1_0):
 @pytest.fixture(scope="session")
 def elasticsearch_7_10_fgac_client(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
     client = wr.opensearch.connect(
-        host=domain_endpoint_elasticsearch_7_10_fgac, fgac_user="test", fgac_password=opensearch_password
+        host=domain_endpoint_elasticsearch_7_10_fgac, user="test", password=opensearch_password
     )
     return client
 

From a73d8759fdf225724a599fc5c6fc1fcd34871dfa Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Thu, 30 Sep 2021 13:20:10 -0400
Subject: [PATCH 31/41] [skip ci] opensearch: add connection timeout

---
 awswrangler/opensearch/_utils.py | 14 +++++++++-----
 tests/test_opensearch.py         |  4 ++--
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index f38b71181..e3afee730 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -36,7 +36,7 @@ def connect(
     port: Optional[int] = 443,
     boto3_session: Optional[boto3.Session] = boto3.Session(),
     region: Optional[str] = None,
-    user: Optional[str] = None,
+    username: Optional[str] = None,
     password: Optional[str] = None,
 ) -> Elasticsearch:
     """Create a secure connection to the specified Amazon OpenSearch domain.
@@ -46,7 +46,8 @@ def connect(
     We use [elasticsearch-py](https://elasticsearch-py.readthedocs.io/en/v7.13.4/), an Elasticsearch client for Python,
     version 7.13.4, which is the recommended version for best compatibility Amazon OpenSearch,
     since later versions may reject connections to Amazon OpenSearch clusters.
-    In the future will move to a new open source client under the [OpenSearch project](https://www.opensearch.org/)
+    In the future we will use [opensearch-py](https://github.com/opensearch-project/opensearch-py) \
+(currently in the works).
     You can read more here:
     https://aws.amazon.com/blogs/opensource/keeping-clients-of-opensearch-and-elasticsearch-compatible-with-open-source/
     https://opensearch.org/docs/clients/index/
@@ -65,7 +66,7 @@ def connect(
         Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
     region :
         AWS region of the Amazon OS domain. If not provided will be extracted from boto3_session.
-    user :
+    username :
         Fine-grained access control user. Mandatory if OS Cluster uses Fine Grained Access Control.
     password :
         Fine-grained access control password. Mandatory if OS Cluster uses Fine Grained Access Control.
@@ -81,8 +82,8 @@ def connect(
     if port not in valid_ports:
         raise ValueError("results: port must be one of %r." % valid_ports)
 
-    if user and password:
-        http_auth = (user, password)
+    if username and password:
+        http_auth = (username, password)
     else:
         if boto3_session is None:
             raise ValueError("Please provide either boto3_session or FGAC user+password")
@@ -99,6 +100,9 @@ def connect(
             use_ssl=True,
             verify_certs=True,
             connection_class=RequestsHttpConnection,
+            timeout=30,
+            max_retries=10,
+            retry_on_timeout=True,
         )
     except Exception as e:
         _logger.error("Error connecting to Opensearch cluster. Please verify authentication details")
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index a62d5ed2e..bf7c8fa39 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -157,7 +157,7 @@ def test_connection_opensearch_1_0_https(domain_endpoint_opensearch_1_0):
 
 def test_connection_elasticsearch_7_10_fgac(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
     client = wr.opensearch.connect(
-        host=domain_endpoint_elasticsearch_7_10_fgac, user="test", password=opensearch_password
+        host=domain_endpoint_elasticsearch_7_10_fgac, username="test", password=opensearch_password
     )
     print(client.info())
     assert len(client.info()) > 0
@@ -172,7 +172,7 @@ def opensearch_1_0_client(domain_endpoint_opensearch_1_0):
 @pytest.fixture(scope="session")
 def elasticsearch_7_10_fgac_client(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
     client = wr.opensearch.connect(
-        host=domain_endpoint_elasticsearch_7_10_fgac, user="test", password=opensearch_password
+        host=domain_endpoint_elasticsearch_7_10_fgac, username="test", password=opensearch_password
     )
     return client
 

From ed7a57c638639e7d60ff550b4ba06cebc5d27e1e Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Thu, 30 Sep 2021 20:34:22 -0400
Subject: [PATCH 32/41] opensearch: get_credentials_from_session

---
 awswrangler/opensearch/_utils.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index e3afee730..92efc10ea 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -8,6 +8,8 @@
 from elasticsearch import Elasticsearch, RequestsHttpConnection
 from requests_aws4auth import AWS4Auth
 
+from awswrangler import _utils, exceptions
+
 _logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -67,7 +69,7 @@ def connect(
     region :
         AWS region of the Amazon OS domain. If not provided will be extracted from boto3_session.
     username :
-        Fine-grained access control user. Mandatory if OS Cluster uses Fine Grained Access Control.
+        Fine-grained access control username. Mandatory if OS Cluster uses Fine Grained Access Control.
     password :
         Fine-grained access control password. Mandatory if OS Cluster uses Fine Grained Access Control.
 
@@ -85,13 +87,16 @@ def connect(
     if username and password:
         http_auth = (username, password)
     else:
-        if boto3_session is None:
-            raise ValueError("Please provide either boto3_session or FGAC user+password")
-        # else:
         if region is None:
-            region = boto3_session.region_name
-        creds = boto3_session.get_credentials()
-        http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", creds.token)
+            region = _utils.get_region_from_session(boto3_session=boto3_session)
+        creds = _utils.get_credentials_from_session(boto3_session=boto3_session)
+        if creds.access_key is None or creds.secret_key is None:
+            raise exceptions.InvalidArgument(
+                "One of IAM Role or AWS ACCESS_KEY_ID and SECRET_ACCESS_KEY must be "
+                "given. Unable to find ACCESS_KEY_ID and SECRET_ACCESS_KEY in boto3 "
+                "session."
+            )
+        http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", session_token=creds.token)
     try:
         es = Elasticsearch(
             host=_strip_endpoint(host),

From 545e16319e04ade43a7e264bcc50c105041abd14 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Fri, 1 Oct 2021 00:58:39 -0400
Subject: [PATCH 33/41] [skip ci] opensearch: indexing progressbar

---
 awswrangler/opensearch/_write.py | 39 ++++++++++++++++++++++++--------
 poetry.lock                      | 37 +++++++++++++++++++++++++++++-
 pyproject.toml                   |  1 +
 tests/test_opensearch.py         |  2 ++
 4 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 7386941bf..1478983f9 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -8,7 +8,8 @@
 
 import boto3
 import pandas as pd
-from elasticsearch import Elasticsearch
+import progressbar
+from elasticsearch import Elasticsearch, TransportError
 from elasticsearch.exceptions import NotFoundError
 from elasticsearch.helpers import bulk
 from jsonpath_ng import parse
@@ -21,6 +22,8 @@
 _logger: logging.Logger = logging.getLogger(__name__)
 _logger.setLevel(logging.DEBUG)
 
+_DEFAULT_REFRESH_INTERVAL = "1s"
+
 
 def _selected_keys(document: Mapping[str, Any], keys_to_write: Optional[List[str]]) -> Mapping[str, Any]:
     if keys_to_write is None:
@@ -40,7 +43,7 @@ def _actions_generator(
     bulk_chunk_documents = []
     for i, document in enumerate(documents):
         if id_keys:
-            _id = "-".join([document[id_key] for id_key in id_keys])
+            _id = "-".join([str(document[id_key]) for id_key in id_keys])
         else:
             _id = document.get("_id", uuid.uuid4())
         bulk_chunk_documents.append(
@@ -113,13 +116,14 @@ def _get_refresh_interval(client: Elasticsearch, index: str) -> Any:
     url = f"/{index}/_settings"
     try:
         response = client.transport.perform_request("GET", url)
-        refresh_interval = response.get(index, {}).get("index", {}).get("refresh_interval", "1s")  # type: ignore
+        index_settings = response.get(index, {}).get("index", {})  # type: ignore
+        refresh_interval = index_settings.get("refresh_interval", _DEFAULT_REFRESH_INTERVAL)
         return refresh_interval
     except NotFoundError:
         return None
 
 
-def _set_refresh_interval(client: Elasticsearch, index: str, refresh_interval: str) -> Any:
+def _set_refresh_interval(client: Elasticsearch, index: str, refresh_interval: Optional[Any]) -> Any:
     url = f"/{index}/_settings"
     body = {"index": {"refresh_interval": refresh_interval}}
     response = client.transport.perform_request("PUT", url, headers={"Content-Type": "application/json"}, body=body)
@@ -526,11 +530,17 @@ def index_documents(
     errors: List[Any] = []
     refresh_interval = None
     try:
-        if total_documents > bulk_size:
-            refresh_interval = _get_refresh_interval(client, index)
-            if refresh_interval:
+        widgets = [
+            progressbar.Percentage(),
+            progressbar.SimpleProgress(format=" (%(value_s)s/%(max_value_s)s)"),
+            progressbar.Bar(),
+            progressbar.Timer(),
+        ]
+        progress_bar = progressbar.ProgressBar(widgets=widgets, max_value=total_documents, prefix="Indexing: ").start()
+        for i, bulk_chunk_documents in enumerate(actions):
+            if i == 1:  # second bulk iteration, in case the index didn't exist before
+                refresh_interval = _get_refresh_interval(client, index)
                 _disable_refresh_interval(client, index)
-        for bulk_chunk_documents in actions:
             _logger.debug("running bulk index of %s documents", len(bulk_chunk_documents))
             _success, _errors = bulk(
                 client=client,
@@ -547,8 +557,17 @@ def index_documents(
             success += _success
             errors += _errors  # type: ignore
             _logger.debug("indexed %s documents (%s/%s)", _success, success, total_documents)
+            progress_bar.update(success, force=True)
+    except TransportError as e:
+        if str(e.status_code) == "429":  # Too Many Requests
+            _logger.error(
+                "Error 429 (Too Many Requests):"
+                "Try to tune bulk_size parameter."
+                "Read more here: https://aws.amazon.com/premiumsupport/knowledge-center/resolve-429-error-es"
+            )
+            raise e
+
     finally:
-        if refresh_interval:
-            _set_refresh_interval(client, index, refresh_interval)
+        _set_refresh_interval(client, index, refresh_interval)
 
     return {"success": success, "errors": errors}
diff --git a/poetry.lock b/poetry.lock
index 864bdd64f..58c0ff6d8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1308,6 +1308,22 @@ category = "main"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "progressbar2"
+version = "3.53.3"
+description = "A Python Progressbar library to provide visual (yet text based) progress to long running operations."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+python-utils = ">=2.3.0"
+six = "*"
+
+[package.extras]
+docs = ["sphinx (>=1.7.4)"]
+tests = ["flake8 (>=3.7.7)", "pytest (>=4.6.9)", "pytest-cov (>=2.6.1)", "freezegun (>=0.3.11)", "sphinx (>=1.8.5)"]
+
 [[package]]
 name = "prometheus-client"
 version = "0.11.0"
@@ -1573,6 +1589,17 @@ category = "dev"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "python-utils"
+version = "2.5.6"
+description = "Python Utils is a module with some convenient utilities not included with the standard Python install"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+six = "*"
+
 [[package]]
 name = "pytz"
 version = "2021.1"
@@ -2164,7 +2191,7 @@ sqlserver = ["pyodbc"]
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.6.2, <3.10"
-content-hash = "e01439ee1c27186731e13aa463473f281cf349c44dd7c9dc3b6112a6c49c533c"
+content-hash = "dc43ca4a72073bdd82a0c36ec5b1b60eb68ae95055e885b190dc35b36a89137f"
 
 [metadata.files]
 aiobotocore = [
@@ -2993,6 +3020,10 @@ ply = [
     {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"},
     {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
 ]
+progressbar2 = [
+    {file = "progressbar2-3.53.3-py2.py3-none-any.whl", hash = "sha256:6610fe393a4591967ecf9062d42c0663c8862092245c490e5971ec5f348755ca"},
+    {file = "progressbar2-3.53.3.tar.gz", hash = "sha256:f4e1c2d48e608850c59f793d6e74ccdebbcbaac7ffe917d45e9646ec0d664d6d"},
+]
 prometheus-client = [
     {file = "prometheus_client-0.11.0-py2.py3-none-any.whl", hash = "sha256:b014bc76815eb1399da8ce5fc84b7717a3e63652b0c0f8804092c9363acab1b2"},
     {file = "prometheus_client-0.11.0.tar.gz", hash = "sha256:3a8baade6cb80bcfe43297e33e7623f3118d660d41387593758e2fb1ea173a86"},
@@ -3144,6 +3175,10 @@ python-dateutil = [
 python-levenshtein = [
     {file = "python-Levenshtein-0.12.2.tar.gz", hash = "sha256:dc2395fbd148a1ab31090dd113c366695934b9e85fe5a4b2a032745efd0346f6"},
 ]
+python-utils = [
+    {file = "python-utils-2.5.6.tar.gz", hash = "sha256:352d5b1febeebf9b3cdb9f3c87a3b26ef22d3c9e274a8ec1e7048ecd2fac4349"},
+    {file = "python_utils-2.5.6-py2.py3-none-any.whl", hash = "sha256:18fbc1a1df9a9061e3059a48ebe5c8a66b654d688b0e3ecca8b339a7f168f208"},
+]
 pytz = [
     {file = "pytz-2021.1-py2.py3-none-any.whl", hash = "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"},
     {file = "pytz-2021.1.tar.gz", hash = "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da"},
diff --git a/pyproject.toml b/pyproject.toml
index c488464e2..0071a8533 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,7 @@ tox = "^3.24.4"
 elasticsearch = "7.13.4"
 requests-aws4auth = "^1.1.1"
 jsonpath-ng = "^1.5.3"
+progressbar2 = "^3.53.3"
 
 
 [tool.poetry.extras]
diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py
index bf7c8fa39..345d248e3 100644
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import tempfile
+import time
 
 import boto3
 import pandas as pd
@@ -186,6 +187,7 @@ def client(request):
 def test_create_index(client):
     index = "test_create_index"
     wr.opensearch.delete_index(client, index)
+    time.sleep(0.5)  # let the cluster clean up
     response = wr.opensearch.create_index(
         client=client,
         index=index,

From 6042ae4b4ce3df4ec9d05b4e93ccc3cedee7c65a Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Sun, 3 Oct 2021 02:22:20 -0400
Subject: [PATCH 34/41] [skip ci] opensearch.index_documents.max_retries
 default 5

---
 awswrangler/opensearch/_write.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 1478983f9..b54c7d7e6 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -445,7 +445,7 @@ def index_documents(
     bulk_size: int = 1000,
     chunk_size: Optional[int] = 500,
     max_chunk_bytes: Optional[int] = 100 * 1024 * 1024,
-    max_retries: Optional[int] = 2,
+    max_retries: Optional[int] = 5,
     initial_backoff: Optional[int] = 2,
     max_backoff: Optional[int] = 600,
     **kwargs: Any,

From c53cd6fe095ab31c568b79ef9ef43c3b1a335e55 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Mon, 4 Oct 2021 16:02:05 -0400
Subject: [PATCH 35/41] opensearch: replace elasticsearch-py with opensearch-py
 low-level client

---
 awswrangler/opensearch/_read.py  | 22 ++++++------
 awswrangler/opensearch/_utils.py | 27 ++++++--------
 awswrangler/opensearch/_write.py | 62 ++++++++++++++++----------------
 poetry.lock                      | 45 ++++++++++++-----------
 pyproject.toml                   |  2 +-
 5 files changed, 75 insertions(+), 83 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index 5afe44d80..015e47afa 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -3,8 +3,8 @@
 from typing import Any, Dict, List, Mapping, Optional, Union
 
 import pandas as pd
-from elasticsearch import Elasticsearch
-from elasticsearch.helpers import scan
+from opensearchpy import OpenSearch
+from opensearchpy.helpers import scan
 
 from awswrangler.opensearch._utils import _get_distribution
 
@@ -41,7 +41,7 @@ def _search_response_to_df(response: Union[Mapping[str, Any], Any]) -> pd.DataFr
 
 
 def search(
-    client: Elasticsearch,
+    client: OpenSearch,
     index: Optional[str] = "_all",
     search_body: Optional[Dict[str, Any]] = None,
     doc_type: Optional[str] = None,
@@ -52,8 +52,8 @@ def search(
 
     Parameters
     ----------
-    client : Elasticsearch
-        instance of elasticsearch.Elasticsearch to use.
+    client : OpenSearch
+        instance of opensearchpy.OpenSearch to use.
     index : str, optional
         A comma-separated list of index names to search.
         use `_all` or empty string to perform the operation on all indices.
@@ -68,9 +68,9 @@ def search(
         Because scroll search contexts consume a lot of memory, we suggest you don’t use the scroll operation
         for frequent user queries.
     **kwargs :
-        KEYWORD arguments forwarded to [elasticsearch.Elasticsearch.search]\
-(https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch.Elasticsearch.search)
-        and also to [elasticsearch.helpers.scan](https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan)
+        KEYWORD arguments forwarded to [opensearchpy.OpenSearch.search]\
+(https://opensearch-py.readthedocs.io/en/latest/api.html#opensearchpy.OpenSearch.search)
+        and also to [opensearchpy.helpers.scan](https://opensearch-py.readthedocs.io/en/master/helpers.html#scan)
          if `is_scroll=True`
 
     Returns
@@ -111,13 +111,13 @@ def search(
     return df
 
 
-def search_by_sql(client: Elasticsearch, sql_query: str, **kwargs: Any) -> pd.DataFrame:
+def search_by_sql(client: OpenSearch, sql_query: str, **kwargs: Any) -> pd.DataFrame:
     """Return results matching [SQL query](https://opensearch.org/docs/search-plugins/sql/index/) as pandas dataframe.
 
     Parameters
     ----------
-    client : Elasticsearch
-        instance of elasticsearch.Elasticsearch to use.
+    client : OpenSearch
+        instance of opensearchpy.OpenSearch to use.
     sql_query : str
         SQL query
     **kwargs :
diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index 92efc10ea..b2a139cbf 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -5,7 +5,7 @@
 from typing import Any, Optional
 
 import boto3
-from elasticsearch import Elasticsearch, RequestsHttpConnection
+from opensearchpy import OpenSearch, RequestsHttpConnection
 from requests_aws4auth import AWS4Auth
 
 from awswrangler import _utils, exceptions
@@ -13,15 +13,15 @@
 _logger: logging.Logger = logging.getLogger(__name__)
 
 
-def _get_distribution(client: Elasticsearch) -> Any:
+def _get_distribution(client: OpenSearch) -> Any:
     return client.info().get("version", {}).get("distribution", "elasticsearch")
 
 
-def _get_version(client: Elasticsearch) -> Any:
+def _get_version(client: OpenSearch) -> Any:
     return client.info().get("version", {}).get("number")
 
 
-def _get_version_major(client: Elasticsearch) -> Any:
+def _get_version_major(client: OpenSearch) -> Any:
     version = _get_version(client)
     if version:
         return int(version.split(".")[0])
@@ -40,19 +40,12 @@ def connect(
     region: Optional[str] = None,
     username: Optional[str] = None,
     password: Optional[str] = None,
-) -> Elasticsearch:
+) -> OpenSearch:
     """Create a secure connection to the specified Amazon OpenSearch domain.
 
     Note
     ----
-    We use [elasticsearch-py](https://elasticsearch-py.readthedocs.io/en/v7.13.4/), an Elasticsearch client for Python,
-    version 7.13.4, which is the recommended version for best compatibility Amazon OpenSearch,
-    since later versions may reject connections to Amazon OpenSearch clusters.
-    In the future we will use [opensearch-py](https://github.com/opensearch-project/opensearch-py) \
-(currently in the works).
-    You can read more here:
-    https://aws.amazon.com/blogs/opensource/keeping-clients-of-opensearch-and-elasticsearch-compatible-with-open-source/
-    https://opensearch.org/docs/clients/index/
+    We use [opensearch-py](https://github.com/opensearch-project/opensearch-py), an OpenSearch low-level python client.
 
     The username and password are mandatory if the OS Cluster uses [Fine Grained Access Control]\
 (https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html).
@@ -75,9 +68,9 @@ def connect(
 
     Returns
     -------
-    elasticsearch.Elasticsearch
-        Elasticsearch low-level client.
-        https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch
+    opensearchpy.OpenSearch
+        OpenSearch low-level client.
+        https://github.com/opensearch-project/opensearch-py/blob/main/opensearchpy/client/__init__.py
     """
     valid_ports = {80, 443}
 
@@ -98,7 +91,7 @@ def connect(
             )
         http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", session_token=creds.token)
     try:
-        es = Elasticsearch(
+        es = OpenSearch(
             host=_strip_endpoint(host),
             port=port,
             http_auth=http_auth,
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index b54c7d7e6..5bb081909 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -9,11 +9,11 @@
 import boto3
 import pandas as pd
 import progressbar
-from elasticsearch import Elasticsearch, TransportError
-from elasticsearch.exceptions import NotFoundError
-from elasticsearch.helpers import bulk
 from jsonpath_ng import parse
 from jsonpath_ng.exceptions import JsonPathParserError
+from opensearchpy import OpenSearch, TransportError
+from opensearchpy.exceptions import NotFoundError
+from opensearchpy.helpers import bulk
 from pandas import notna
 
 from awswrangler._utils import parse_path
@@ -112,7 +112,7 @@ def _get_documents_w_json_path(documents: List[Mapping[str, Any]], json_path: st
     return output_documents
 
 
-def _get_refresh_interval(client: Elasticsearch, index: str) -> Any:
+def _get_refresh_interval(client: OpenSearch, index: str) -> Any:
     url = f"/{index}/_settings"
     try:
         response = client.transport.perform_request("GET", url)
@@ -123,7 +123,7 @@ def _get_refresh_interval(client: Elasticsearch, index: str) -> Any:
         return None
 
 
-def _set_refresh_interval(client: Elasticsearch, index: str, refresh_interval: Optional[Any]) -> Any:
+def _set_refresh_interval(client: OpenSearch, index: str, refresh_interval: Optional[Any]) -> Any:
     url = f"/{index}/_settings"
     body = {"index": {"refresh_interval": refresh_interval}}
     response = client.transport.perform_request("PUT", url, headers={"Content-Type": "application/json"}, body=body)
@@ -132,14 +132,14 @@ def _set_refresh_interval(client: Elasticsearch, index: str, refresh_interval: O
 
 
 def _disable_refresh_interval(
-    client: Elasticsearch,
+    client: OpenSearch,
     index: str,
 ) -> Any:
     return _set_refresh_interval(client=client, index=index, refresh_interval="-1")
 
 
 def create_index(
-    client: Elasticsearch,
+    client: OpenSearch,
     index: str,
     doc_type: Optional[str] = None,
     settings: Optional[Dict[str, Any]] = None,
@@ -149,8 +149,8 @@ def create_index(
 
     Parameters
     ----------
-    client : Elasticsearch
-        instance of elasticsearch.Elasticsearch to use.
+    client : OpenSearch
+        instance of opensearchpy.OpenSearch to use.
     index : str
         Name of the index.
     doc_type : str, optional
@@ -214,13 +214,13 @@ def create_index(
     return response
 
 
-def delete_index(client: Elasticsearch, index: str) -> Dict[str, Any]:
+def delete_index(client: OpenSearch, index: str) -> Dict[str, Any]:
     """Create an index.
 
     Parameters
     ----------
-    client : Elasticsearch
-        instance of elasticsearch.Elasticsearch to use.
+    client : OpenSearch
+        instance of opensearchpy.OpenSearch to use.
     index : str
         Name of the index.
 
@@ -249,7 +249,7 @@ def delete_index(client: Elasticsearch, index: str) -> Dict[str, Any]:
 
 
 def index_json(
-    client: Elasticsearch,
+    client: OpenSearch,
     path: str,
     index: str,
     doc_type: Optional[str] = None,
@@ -264,14 +264,14 @@ def index_json(
 
     Parameters
     ----------
-    client : Elasticsearch
-        instance of elasticsearch.Elasticsearch to use.
+    client : OpenSearch
+        instance of opensearchpy.OpenSearch to use.
     path : str
         s3 or local path to the JSON file which contains the documents.
     index : str
         Name of the index.
     doc_type : str, optional
-        Name of the document type (only for Elasticsearch versions 5.x and earlier).
+        Name of the document type (for Elasticsearch versions 5.x and earlier).
     json_path : str, optional
         JsonPath expression to specify explicit path to a single name element
         in a JSON hierarchical data structure.
@@ -323,7 +323,7 @@ def index_json(
 
 
 def index_csv(
-    client: Elasticsearch,
+    client: OpenSearch,
     path: str,
     index: str,
     doc_type: Optional[str] = None,
@@ -334,14 +334,14 @@ def index_csv(
 
     Parameters
     ----------
-    client : Elasticsearch
-        instance of elasticsearch.Elasticsearch to use.
+    client : OpenSearch
+        instance of opensearchpy.OpenSearch to use.
     path : str
         s3 or local path to the CSV file which contains the documents.
     index : str
         Name of the index.
     doc_type : str, optional
-        Name of the document type (only for Elasticsearch versions 5.x and older).
+        Name of the document type (for Elasticsearch versions 5.x and earlier).
     pandas_kwargs : Dict[str, Any], optional
         Dictionary of arguments forwarded to pandas.read_csv().
         e.g. pandas_kwargs={'sep': '|', 'na_values': ['null', 'none']}
@@ -394,20 +394,20 @@ def index_csv(
 
 
 def index_df(
-    client: Elasticsearch, df: pd.DataFrame, index: str, doc_type: Optional[str] = None, **kwargs: Any
+    client: OpenSearch, df: pd.DataFrame, index: str, doc_type: Optional[str] = None, **kwargs: Any
 ) -> Dict[str, Any]:
     """Index all documents from a DataFrame to OpenSearch index.
 
     Parameters
     ----------
-    client : Elasticsearch
-        instance of elasticsearch.Elasticsearch to use.
+    client : OpenSearch
+        instance of opensearchpy.OpenSearch to use.
     df : pd.DataFrame
         Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
     index : str
         Name of the index.
     doc_type : str, optional
-        Name of the document type (only for Elasticsearch versions 5.x and older).
+        Name of the document type (for Elasticsearch versions 5.x and earlier).
     **kwargs :
         KEYWORD arguments forwarded to :func:`~awswrangler.opensearch.index_documents`
         which is used to execute the operation
@@ -435,7 +435,7 @@ def index_df(
 
 
 def index_documents(
-    client: Elasticsearch,
+    client: OpenSearch,
     documents: Iterable[Mapping[str, Any]],
     index: str,
     doc_type: Optional[str] = None,
@@ -454,9 +454,9 @@ def index_documents(
 
     Note
     ----
-    Some of the args are referenced from elasticsearch-py client library (bulk helpers)
-    https://elasticsearch-py.readthedocs.io/en/v7.13.4/helpers.html#elasticsearch.helpers.bulk
-    https://elasticsearch-py.readthedocs.io/en/v7.13.4/helpers.html#elasticsearch.helpers.streaming_bulk
+    Some of the args are referenced from opensearch-py client library (bulk helpers)
+    https://opensearch-py.readthedocs.io/en/latest/helpers.html#opensearchpy.helpers.bulk
+    https://opensearch-py.readthedocs.io/en/latest/helpers.html#opensearchpy.helpers.streaming_bulk
 
     If you receive `Error 429 (Too Many Requests) /_bulk` please to to decrease `bulk_size` value.
     Please also consider modifying the cluster size and instance type -
@@ -464,14 +464,14 @@ def index_documents(
 
     Parameters
     ----------
-    client : Elasticsearch
-        instance of elasticsearch.Elasticsearch to use.
+    client : OpenSearch
+        instance of opensearchpy.OpenSearch to use.
     documents : Iterable[Mapping[str, Any]]
         List which contains the documents that will be inserted.
     index : str
         Name of the index.
     doc_type : str, optional
-        Name of the document type (only for Elasticsearch versions 5.x and older).
+        Name of the document type (for Elasticsearch versions 5.x and earlier).
     keys_to_write : List[str], optional
         list of keys to index. If not provided all keys will be indexed
     id_keys : List[str], optional
diff --git a/poetry.lock b/poetry.lock
index 58c0ff6d8..790afb055 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -433,24 +433,6 @@ category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 
-[[package]]
-name = "elasticsearch"
-version = "7.13.4"
-description = "Python client for Elasticsearch"
-category = "main"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
-
-[package.dependencies]
-certifi = "*"
-urllib3 = ">=1.21.1,<2"
-
-[package.extras]
-async = ["aiohttp (>=3,<4)"]
-develop = ["requests (>=2.0.0,<3.0.0)", "coverage", "mock", "pyyaml", "pytest", "pytest-cov", "sphinx (<1.7)", "sphinx-rtd-theme", "black", "jinja2"]
-docs = ["sphinx (<1.7)", "sphinx-rtd-theme"]
-requests = ["requests (>=2.4.0,<3.0.0)"]
-
 [[package]]
 name = "entrypoints"
 version = "0.3"
@@ -1165,6 +1147,23 @@ python-versions = ">=3.6,"
 [package.dependencies]
 et-xmlfile = "*"
 
+[[package]]
+name = "opensearch-py"
+version = "1.0.0"
+description = "Python low-level client for OpenSearch"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
+
+[package.dependencies]
+certifi = "*"
+urllib3 = ">=1.21.1,<2"
+
+[package.extras]
+async = ["aiohttp (>=3,<4)"]
+develop = ["requests (>=2.0.0,<3.0.0)", "coverage", "mock", "pyyaml", "pytest", "pytest-cov", "black", "jinja2"]
+requests = ["requests (>=2.4.0,<3.0.0)"]
+
 [[package]]
 name = "packaging"
 version = "21.0"
@@ -2191,7 +2190,7 @@ sqlserver = ["pyodbc"]
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.6.2, <3.10"
-content-hash = "dc43ca4a72073bdd82a0c36ec5b1b60eb68ae95055e885b190dc35b36a89137f"
+content-hash = "5ae102e5d974439be05598ed53be1817216b1d958b89dfdd5ef1622c9708847a"
 
 [metadata.files]
 aiobotocore = [
@@ -2501,10 +2500,6 @@ docutils = [
     {file = "docutils-0.17.1-py2.py3-none-any.whl", hash = "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"},
     {file = "docutils-0.17.1.tar.gz", hash = "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125"},
 ]
-elasticsearch = [
-    {file = "elasticsearch-7.13.4-py2.py3-none-any.whl", hash = "sha256:5920df0ab2630778680376d86bea349dc99860977eec9b6d2bd0860f337313f2"},
-    {file = "elasticsearch-7.13.4.tar.gz", hash = "sha256:52dda85f76eeb85ec873bf9ffe0ba6849e544e591f66d4048a5e48016de268e0"},
-]
 entrypoints = [
     {file = "entrypoints-0.3-py2.py3-none-any.whl", hash = "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19"},
     {file = "entrypoints-0.3.tar.gz", hash = "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"},
@@ -2932,6 +2927,10 @@ openpyxl = [
     {file = "openpyxl-3.0.7-py2.py3-none-any.whl", hash = "sha256:46af4eaf201a89b610fcca177eed957635f88770a5462fb6aae4a2a52b0ff516"},
     {file = "openpyxl-3.0.7.tar.gz", hash = "sha256:6456a3b472e1ef0facb1129f3c6ef00713cebf62e736cd7a75bcc3247432f251"},
 ]
+opensearch-py = [
+    {file = "opensearch-py-1.0.0.tar.gz", hash = "sha256:fa952836cabfa1b2fb05f852edc1a373342494345e89fd52b7124daf4d296bb4"},
+    {file = "opensearch_py-1.0.0-py2.py3-none-any.whl", hash = "sha256:17afebc25dc890b96c4e9ec8692dcfdb6842c028ce8c2d252e8f55c587960177"},
+]
 packaging = [
     {file = "packaging-21.0-py3-none-any.whl", hash = "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"},
     {file = "packaging-21.0.tar.gz", hash = "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7"},
diff --git a/pyproject.toml b/pyproject.toml
index 0071a8533..0ff7aa28f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,10 +48,10 @@ pyodbc = { version = "~4.0.30", optional = true }
 sphinx-bootstrap-theme = "^0.8.0"
 Sphinx = "^4.2.0"
 tox = "^3.24.4"
-elasticsearch = "7.13.4"
 requests-aws4auth = "^1.1.1"
 jsonpath-ng = "^1.5.3"
 progressbar2 = "^3.53.3"
+opensearch-py = "^1.0.0"
 
 
 [tool.poetry.extras]

From 5c5d71780bcb11f59fa84016877bfad638329c1a Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Mon, 4 Oct 2021 23:57:30 -0400
Subject: [PATCH 36/41] [skip ci] opensearch filter_path default value

---
 awswrangler/opensearch/_read.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index 015e47afa..ba345ab43 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -1,6 +1,6 @@
 """Amazon OpenSearch Read Module (PRIVATE)."""
 
-from typing import Any, Dict, List, Mapping, Optional, Union
+from typing import Any, Collection, Dict, List, Mapping, Optional, Union
 
 import pandas as pd
 from opensearchpy import OpenSearch
@@ -46,6 +46,7 @@ def search(
     search_body: Optional[Dict[str, Any]] = None,
     doc_type: Optional[str] = None,
     is_scroll: Optional[bool] = False,
+    filter_path: Optional[Union[str, Collection[str]]] = None,
     **kwargs: Any,
 ) -> pd.DataFrame:
     """Return results matching query DSL as pandas dataframe.
@@ -67,6 +68,9 @@ def search(
         for example, for machine learning jobs.
         Because scroll search contexts consume a lot of memory, we suggest you don’t use the scroll operation
         for frequent user queries.
+    filter_path : Union[str, Collection[str]], optional
+        Use the filter_path parameter to reduce the size of the OpenSearch Service response \
+(default: ['hits.hits._id','hits.hits._source'])
     **kwargs :
         KEYWORD arguments forwarded to [opensearchpy.OpenSearch.search]\
 (https://opensearch-py.readthedocs.io/en/latest/api.html#opensearchpy.OpenSearch.search)
@@ -101,12 +105,18 @@ def search(
     if doc_type:
         kwargs["doc_type"] = doc_type
 
+    if filter_path is None:
+        filter_path = ["hits.hits._id", "hits.hits._source"]
+
     if is_scroll:
-        documents_generator = scan(client, index=index, query=search_body, **kwargs)
+        if isinstance(filter_path, str):
+            filter_path = [filter_path]
+        filter_path = ["_scroll_id", "_shards"] + list(filter_path)  # required for scroll
+        documents_generator = scan(client, index=index, query=search_body, filter_path=filter_path, **kwargs)
         documents = [_hit_to_row(doc) for doc in documents_generator]
         df = pd.DataFrame(documents)
     else:
-        response = client.search(index=index, body=search_body, **kwargs)
+        response = client.search(index=index, body=search_body, filter_path=filter_path, **kwargs)
         df = _search_response_to_df(response)
     return df
 

From 152c4076ee906b398a7822be62e0778adb17844a Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Mon, 4 Oct 2021 23:58:22 -0400
Subject: [PATCH 37/41] [skip ci] opensearch tutorial

---
 tutorials/031 - OpenSearch.ipynb | 1575 ++++++++++++++++++++++++++++--
 1 file changed, 1504 insertions(+), 71 deletions(-)

diff --git a/tutorials/031 - OpenSearch.ipynb b/tutorials/031 - OpenSearch.ipynb
index f1b2b5ccb..afe254669 100644
--- a/tutorials/031 - OpenSearch.ipynb	
+++ b/tutorials/031 - OpenSearch.ipynb	
@@ -19,14 +19,30 @@
    "metadata": {},
    "source": [
     "## Table of Contents\n",
-    "* [1. Create Indices](#2.-Create-Indices)\n",
-    "* [2. Write Indices](#3.-Write-Indices)\n",
-    "\t* [2.1 Writing from JSON file](#2.1-Writing-from-JSON-file)\n",
-    "\t* [2.2 Writing from CSV file](#2.2-Writing-from-CSV-file)\n",
-    "* [3. Search Indices](#1.-Search-Indices)\n",
-    "\t* [3.1 Search by DSL](#1.1-Search-by-DSL)\n",
-    "\t* [3.2 Search by SQL](#1.2-Search-by-SQL)\n",
-    "* [4. Delete Indices](#7.-Delete-Indices)\n"
+    "* [1. Initialize](#initialize)\n",
+    "    * [Connect to your Amazon OpenSearch domain](#connect)\n",
+    "    * [Enter your bucket name](#bucket)\n",
+    "    * [Initialize sample data](#sample-data)\n",
+    "* [2. Indexing (load)](#indexing)\n",
+    "\t* [Index documents (no Pandas)](#index-documents)\n",
+    "\t* [Index json file](#index-json)\n",
+    "    * [Index CSV](#index-csv)\n",
+    "* [3. Search](#search)\n",
+    "\t* [3.1 Search by DSL](#search-dsl)\n",
+    "\t* [3.2 Search by SQL](#search-sql)\n",
+    "* [4. Delete Indices](#delete-index)\n",
+    "* [5. Bonus - Prepare data and index from DataFrame](#bonus)\n",
+    "\t* [Prepare the data for indexing](#prepare-data)\n",
+    "    * [Create index with mapping](#create-index-w-mapping)\n",
+    "    * [Index dataframe](#index-df)\n",
+    "    * [Execute geo query](#search-geo)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Initialize<a class=\"anchor\" id=\"initialize\"></a>"
    ]
   },
   {
@@ -35,14 +51,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import awswrangler as wr\n"
+    "import awswrangler as wr"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Enter your domain endpoint:"
+    "### Connect to your Amazon OpenSearch domain<a class=\"anchor\" id=\"connect\"></a>"
    ]
   },
   {
@@ -51,14 +67,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "client = wr.opensearch.connect(host='DOMAIN-ENDPOINT')"
+    "client = wr.opensearch.connect(\n",
+    "    host='OPENSEARCH-ENDPOINT',\n",
+    "#     username='FGAC-USERNAME(OPTIONAL)',\n",
+    "#     password='FGAC-PASSWORD(OPTIONAL)'\n",
+    ")\n",
+    "client.info()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 1. Create Indices"
+    "### Enter your bucket name<a class=\"anchor\" id=\"bucket\"></a>"
    ]
   },
   {
@@ -67,134 +88,944 @@
    "metadata": {},
    "outputs": [],
    "source": [
-     "response = wr.opensearch.create_index(\n",
-     "     client=client,\n",
-     "     index=\"tutorials\",\n",
-     "     mappings={\n",
-     "       \"properties\": {\n",
-     "          \"id\":  { \"type\" : \"integer\" },\n",
-     "          \"name\":  { \"type\" : \"string\" }\n",
-     "        }\n",
-     "     },\n",
-     "     settings={\n",
-     "         \"index\": {\n",
-     "             \"number_of_shards\": 2,\n",
-     "             \"number_of_replicas\": 1\n",
-     "          }\n",
-     "     }\n",
-     " )\n"
+    "bucket = 'BUCKET'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initialize sample data<a class=\"anchor\" id=\"sample-data\"></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sf_restaurants_inspections = [\n",
+    "    {\n",
+    "        \"inspection_id\": \"24936_20160609\",\n",
+    "        \"business_address\": \"315 California St\",\n",
+    "        \"business_city\": \"San Francisco\",\n",
+    "        \"business_id\": \"24936\",\n",
+    "        \"business_location\": {\"lon\": -122.400152, \"lat\": 37.793199},\n",
+    "        \"business_name\": \"San Francisco Soup Company\",\n",
+    "        \"business_postal_code\": \"94104\",\n",
+    "        \"business_state\": \"CA\",\n",
+    "        \"inspection_date\": \"2016-06-09T00:00:00.000\",\n",
+    "        \"inspection_score\": 77,\n",
+    "        \"inspection_type\": \"Routine - Unscheduled\",\n",
+    "        \"risk_category\": \"Low Risk\",\n",
+    "        \"violation_description\": \"Improper food labeling or menu misrepresentation\",\n",
+    "        \"violation_id\": \"24936_20160609_103141\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"inspection_id\": \"60354_20161123\",\n",
+    "        \"business_address\": \"10 Mason St\",\n",
+    "        \"business_city\": \"San Francisco\",\n",
+    "        \"business_id\": \"60354\",\n",
+    "        \"business_location\": {\"lon\": -122.409061, \"lat\": 37.783527},\n",
+    "        \"business_name\": \"Soup Unlimited\",\n",
+    "        \"business_postal_code\": \"94102\",\n",
+    "        \"business_state\": \"CA\",\n",
+    "        \"inspection_date\": \"2016-11-23T00:00:00.000\",\n",
+    "        \"inspection_type\": \"Routine\",\n",
+    "        \"inspection_score\": 95,\n",
+    "    },\n",
+    "    {\n",
+    "        \"inspection_id\": \"1797_20160705\",\n",
+    "        \"business_address\": \"2872 24th St\",\n",
+    "        \"business_city\": \"San Francisco\",\n",
+    "        \"business_id\": \"1797\",\n",
+    "        \"business_location\": {\"lon\": -122.409752, \"lat\": 37.752807},\n",
+    "        \"business_name\": \"TIO CHILOS GRILL\",\n",
+    "        \"business_postal_code\": \"94110\",\n",
+    "        \"business_state\": \"CA\",\n",
+    "        \"inspection_date\": \"2016-07-05T00:00:00.000\",\n",
+    "        \"inspection_score\": 90,\n",
+    "        \"inspection_type\": \"Routine - Unscheduled\",\n",
+    "        \"risk_category\": \"Low Risk\",\n",
+    "        \"violation_description\": \"Unclean nonfood contact surfaces\",\n",
+    "        \"violation_id\": \"1797_20160705_103142\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"inspection_id\": \"66198_20160527\",\n",
+    "        \"business_address\": \"1661 Tennessee St Suite 3B\",\n",
+    "        \"business_city\": \"San Francisco Whard Restaurant\",\n",
+    "        \"business_id\": \"66198\",\n",
+    "        \"business_location\": {\"lon\": -122.388478, \"lat\": 37.75072},\n",
+    "        \"business_name\": \"San Francisco Restaurant\",\n",
+    "        \"business_postal_code\": \"94107\",\n",
+    "        \"business_state\": \"CA\",\n",
+    "        \"inspection_date\": \"2016-05-27T00:00:00.000\",\n",
+    "        \"inspection_type\": \"Routine\",\n",
+    "        \"inspection_score\": 56,\n",
+    "    },\n",
+    "    {\n",
+    "        \"inspection_id\": \"5794_20160907\",\n",
+    "        \"business_address\": \"2162 24th Ave\",\n",
+    "        \"business_city\": \"San Francisco\",\n",
+    "        \"business_id\": \"5794\",\n",
+    "        \"business_location\": {\"lon\": -122.481299, \"lat\": 37.747228},\n",
+    "        \"business_name\": \"Soup House\",\n",
+    "        \"business_phone_number\": \"+14155752700\",\n",
+    "        \"business_postal_code\": \"94116\",\n",
+    "        \"business_state\": \"CA\",\n",
+    "        \"inspection_date\": \"2016-09-07T00:00:00.000\",\n",
+    "        \"inspection_score\": 96,\n",
+    "        \"inspection_type\": \"Routine - Unscheduled\",\n",
+    "        \"risk_category\": \"Low Risk\",\n",
+    "        \"violation_description\": \"Unapproved or unmaintained equipment or utensils\",\n",
+    "        \"violation_id\": \"5794_20160907_103144\",\n",
+    "    },\n",
+    "    \n",
+    "    # duplicate record\n",
+    "    {\n",
+    "        \"inspection_id\": \"5794_20160907\",\n",
+    "        \"business_address\": \"2162 24th Ave\",\n",
+    "        \"business_city\": \"San Francisco\",\n",
+    "        \"business_id\": \"5794\",\n",
+    "        \"business_location\": {\"lon\": -122.481299, \"lat\": 37.747228},\n",
+    "        \"business_name\": \"Soup-or-Salad\",\n",
+    "        \"business_phone_number\": \"+14155752700\",\n",
+    "        \"business_postal_code\": \"94116\",\n",
+    "        \"business_state\": \"CA\",\n",
+    "        \"inspection_date\": \"2016-09-07T00:00:00.000\",\n",
+    "        \"inspection_score\": 96,\n",
+    "        \"inspection_type\": \"Routine - Unscheduled\",\n",
+    "        \"risk_category\": \"Low Risk\",\n",
+    "        \"violation_description\": \"Unapproved or unmaintained equipment or utensils\",\n",
+    "        \"violation_id\": \"5794_20160907_103144\",\n",
+    "    },\n",
+    "]"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 2. Write Indices"
+    "## 2. Indexing (load)<a class=\"anchor\" id=\"indexing\"></a>"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 2.1 Write from JSON files"
+    "### Index documents (no Pandas)<a class=\"anchor\" id=\"index-documents\"></a>"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Indexing: 100% (6/6)|####################################|Elapsed Time: 0:00:01"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'success': 6, 'errors': []}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# index documents w/o providing keys (_id is auto-generated)\n",
+    "wr.opensearch.index_documents(\n",
+    "        client,\n",
+    "        documents=sf_restaurants_inspections,\n",
+    "        index=\"sf_restaurants_inspections\" \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>_id</th>\n",
+       "      <th>business_name</th>\n",
+       "      <th>inspection_id</th>\n",
+       "      <th>business_location.lon</th>\n",
+       "      <th>business_location.lat</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>663dd72d-0da4-495b-b0ae-ed000105ae73</td>\n",
+       "      <td>TIO CHILOS GRILL</td>\n",
+       "      <td>1797_20160705</td>\n",
+       "      <td>-122.409752</td>\n",
+       "      <td>37.752807</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>ff2f50f6-5415-4706-9bcb-af7c5eb0afa3</td>\n",
+       "      <td>Soup House</td>\n",
+       "      <td>5794_20160907</td>\n",
+       "      <td>-122.481299</td>\n",
+       "      <td>37.747228</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>b9e8f6a2-8fd1-4660-b041-2997a1a80984</td>\n",
+       "      <td>San Francisco Soup Company</td>\n",
+       "      <td>24936_20160609</td>\n",
+       "      <td>-122.400152</td>\n",
+       "      <td>37.793199</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>56b352e6-102b-4eff-8296-7e1fb2459bab</td>\n",
+       "      <td>Soup Unlimited</td>\n",
+       "      <td>60354_20161123</td>\n",
+       "      <td>-122.409061</td>\n",
+       "      <td>37.783527</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>6fec5411-f79a-48e4-be7b-e0e44d5ebbab</td>\n",
+       "      <td>San Francisco Restaurant</td>\n",
+       "      <td>66198_20160527</td>\n",
+       "      <td>-122.388478</td>\n",
+       "      <td>37.750720</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>7ba4fb17-f9a9-49da-b90e-8b3553d6d97c</td>\n",
+       "      <td>Soup-or-Salad</td>\n",
+       "      <td>5794_20160907</td>\n",
+       "      <td>-122.481299</td>\n",
+       "      <td>37.747228</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                    _id               business_name  \\\n",
+       "0  663dd72d-0da4-495b-b0ae-ed000105ae73            TIO CHILOS GRILL   \n",
+       "1  ff2f50f6-5415-4706-9bcb-af7c5eb0afa3                  Soup House   \n",
+       "2  b9e8f6a2-8fd1-4660-b041-2997a1a80984  San Francisco Soup Company   \n",
+       "3  56b352e6-102b-4eff-8296-7e1fb2459bab              Soup Unlimited   \n",
+       "4  6fec5411-f79a-48e4-be7b-e0e44d5ebbab    San Francisco Restaurant   \n",
+       "5  7ba4fb17-f9a9-49da-b90e-8b3553d6d97c               Soup-or-Salad   \n",
+       "\n",
+       "    inspection_id  business_location.lon  business_location.lat  \n",
+       "0   1797_20160705            -122.409752              37.752807  \n",
+       "1   5794_20160907            -122.481299              37.747228  \n",
+       "2  24936_20160609            -122.400152              37.793199  \n",
+       "3  60354_20161123            -122.409061              37.783527  \n",
+       "4  66198_20160527            -122.388478              37.750720  \n",
+       "5   5794_20160907            -122.481299              37.747228  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# read all documents. There are total 6 documents\n",
+    "wr.opensearch.search(\n",
+    "        client,\n",
+    "        index=\"sf_restaurants_inspections\",\n",
+    "        _source=[\"inspection_id\", \"business_name\", \"business_location\"]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Index json file<a class=\"anchor\" id=\"index-json\"></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "import pandas as pd\n",
+    "df = pd.DataFrame(sf_restaurants_inspections)\n",
+    "path = f\"s3://{bucket}/json/sf_restaurants_inspections.json\"\n",
+    "wr.s3.to_json(df, path,orient='records',lines=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Indexing: 100% (6/6)|####################################|Elapsed Time: 0:00:00"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'success': 6, 'errors': []}"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# index json w/ providing keys\n",
     "wr.opensearch.index_json(\n",
-    "       client=client,\n",
-    "       path='s3://awswrangler-opensearch/dataload/doc1.json',\n",
-    "       index='tutorials'\n",
-    "     )\n"
+    "        client,\n",
+    "        path=path, # path can be s3 or local\n",
+    "        index=\"sf_restaurants_inspections_dedup\",\n",
+    "        id_keys=[\"inspection_id\"] # can be multiple fields. arg applicable to all index_* functions\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>_id</th>\n",
+       "      <th>business_name</th>\n",
+       "      <th>inspection_id</th>\n",
+       "      <th>business_location.lon</th>\n",
+       "      <th>business_location.lat</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>24936_20160609</td>\n",
+       "      <td>San Francisco Soup Company</td>\n",
+       "      <td>24936_20160609</td>\n",
+       "      <td>-122.400152</td>\n",
+       "      <td>37.793199</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>66198_20160527</td>\n",
+       "      <td>San Francisco Restaurant</td>\n",
+       "      <td>66198_20160527</td>\n",
+       "      <td>-122.388478</td>\n",
+       "      <td>37.750720</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>5794_20160907</td>\n",
+       "      <td>Soup-or-Salad</td>\n",
+       "      <td>5794_20160907</td>\n",
+       "      <td>-122.481299</td>\n",
+       "      <td>37.747228</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>60354_20161123</td>\n",
+       "      <td>Soup Unlimited</td>\n",
+       "      <td>60354_20161123</td>\n",
+       "      <td>-122.409061</td>\n",
+       "      <td>37.783527</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1797_20160705</td>\n",
+       "      <td>TIO CHILOS GRILL</td>\n",
+       "      <td>1797_20160705</td>\n",
+       "      <td>-122.409752</td>\n",
+       "      <td>37.752807</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              _id               business_name   inspection_id  \\\n",
+       "0  24936_20160609  San Francisco Soup Company  24936_20160609   \n",
+       "1  66198_20160527    San Francisco Restaurant  66198_20160527   \n",
+       "2   5794_20160907               Soup-or-Salad   5794_20160907   \n",
+       "3  60354_20161123              Soup Unlimited  60354_20161123   \n",
+       "4   1797_20160705            TIO CHILOS GRILL   1797_20160705   \n",
+       "\n",
+       "   business_location.lon  business_location.lat  \n",
+       "0            -122.400152              37.793199  \n",
+       "1            -122.388478              37.750720  \n",
+       "2            -122.481299              37.747228  \n",
+       "3            -122.409061              37.783527  \n",
+       "4            -122.409752              37.752807  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# now there are no duplicates. There are total 5 documents\n",
+    "wr.opensearch.search(\n",
+    "        client,\n",
+    "        index=\"sf_restaurants_inspections_dedup\",\n",
+    "        _source=[\"inspection_id\", \"business_name\", \"business_location\"]\n",
+    "    )"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 2.2 Write from CSV files"
+    "### Index CSV<a class=\"anchor\" id=\"index-csv\"></a>"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Indexing: 100% (1000/1000)|##############################|Elapsed Time: 0:00:00"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'success': 1000, 'errors': []}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "wr.opensearch.index_csv(\n",
-    "       client=client,\n",
-    "       path='s3://awswrangler-opensearch/dataload/doc1.csv',\n",
-    "       index='tutorials'\n",
-    "     )\n"
+    "        client, \n",
+    "        index=\"nyc_restaurants_inspections_sample\", \n",
+    "        path='https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv?accessType=DOWNLOAD', # index_csv supports local, s3 and url path\n",
+    "        id_keys=[\"CAMIS\"],\n",
+    "        pandas_kwargs={'na_filter': True, 'nrows': 1000},  # pandas.read_csv() args - https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html\n",
+    "        bulk_size=500 # modify based on your cluster size\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>_id</th>\n",
+       "      <th>CAMIS</th>\n",
+       "      <th>DBA</th>\n",
+       "      <th>BORO</th>\n",
+       "      <th>BUILDING</th>\n",
+       "      <th>STREET</th>\n",
+       "      <th>ZIPCODE</th>\n",
+       "      <th>PHONE</th>\n",
+       "      <th>CUISINE DESCRIPTION</th>\n",
+       "      <th>INSPECTION DATE</th>\n",
+       "      <th>...</th>\n",
+       "      <th>RECORD DATE</th>\n",
+       "      <th>INSPECTION TYPE</th>\n",
+       "      <th>Latitude</th>\n",
+       "      <th>Longitude</th>\n",
+       "      <th>Community Board</th>\n",
+       "      <th>Council District</th>\n",
+       "      <th>Census Tract</th>\n",
+       "      <th>BIN</th>\n",
+       "      <th>BBL</th>\n",
+       "      <th>NTA</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>41610426</td>\n",
+       "      <td>41610426</td>\n",
+       "      <td>GLOW THAI RESTAURANT</td>\n",
+       "      <td>Brooklyn</td>\n",
+       "      <td>7107</td>\n",
+       "      <td>3 AVENUE</td>\n",
+       "      <td>11209.0</td>\n",
+       "      <td>7187481920</td>\n",
+       "      <td>Thai</td>\n",
+       "      <td>02/26/2020</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10/04/2021</td>\n",
+       "      <td>Cycle Inspection / Re-inspection</td>\n",
+       "      <td>40.633865</td>\n",
+       "      <td>-74.026798</td>\n",
+       "      <td>310.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>6800.0</td>\n",
+       "      <td>3146519.0</td>\n",
+       "      <td>3.058910e+09</td>\n",
+       "      <td>BK31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>40811162</td>\n",
+       "      <td>40811162</td>\n",
+       "      <td>CARMINE'S</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>2450</td>\n",
+       "      <td>BROADWAY</td>\n",
+       "      <td>10024.0</td>\n",
+       "      <td>2123622200</td>\n",
+       "      <td>Italian</td>\n",
+       "      <td>05/28/2019</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10/04/2021</td>\n",
+       "      <td>Cycle Inspection / Initial Inspection</td>\n",
+       "      <td>40.791168</td>\n",
+       "      <td>-73.974308</td>\n",
+       "      <td>107.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>17900.0</td>\n",
+       "      <td>1033560.0</td>\n",
+       "      <td>1.012380e+09</td>\n",
+       "      <td>MN12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>50012113</td>\n",
+       "      <td>50012113</td>\n",
+       "      <td>TANG</td>\n",
+       "      <td>Queens</td>\n",
+       "      <td>196-50</td>\n",
+       "      <td>NORTHERN BOULEVARD</td>\n",
+       "      <td>11358.0</td>\n",
+       "      <td>7182797080</td>\n",
+       "      <td>Korean</td>\n",
+       "      <td>08/16/2018</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10/04/2021</td>\n",
+       "      <td>Cycle Inspection / Initial Inspection</td>\n",
+       "      <td>40.757850</td>\n",
+       "      <td>-73.784593</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>145101.0</td>\n",
+       "      <td>4124565.0</td>\n",
+       "      <td>4.055200e+09</td>\n",
+       "      <td>QN48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>50014618</td>\n",
+       "      <td>50014618</td>\n",
+       "      <td>TOTTO RAMEN</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>248</td>\n",
+       "      <td>EAST   52 STREET</td>\n",
+       "      <td>10022.0</td>\n",
+       "      <td>2124210052</td>\n",
+       "      <td>Japanese</td>\n",
+       "      <td>08/20/2018</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10/04/2021</td>\n",
+       "      <td>Cycle Inspection / Re-inspection</td>\n",
+       "      <td>40.756596</td>\n",
+       "      <td>-73.968749</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>9800.0</td>\n",
+       "      <td>1038490.0</td>\n",
+       "      <td>1.013250e+09</td>\n",
+       "      <td>MN19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>50045782</td>\n",
+       "      <td>50045782</td>\n",
+       "      <td>OLLIE'S CHINESE RESTAURANT</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>2705</td>\n",
+       "      <td>BROADWAY</td>\n",
+       "      <td>10025.0</td>\n",
+       "      <td>2129323300</td>\n",
+       "      <td>Chinese</td>\n",
+       "      <td>10/21/2019</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10/04/2021</td>\n",
+       "      <td>Cycle Inspection / Re-inspection</td>\n",
+       "      <td>40.799318</td>\n",
+       "      <td>-73.968440</td>\n",
+       "      <td>107.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>19100.0</td>\n",
+       "      <td>1056562.0</td>\n",
+       "      <td>1.018750e+09</td>\n",
+       "      <td>MN12</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        _id     CAMIS                         DBA       BORO BUILDING  \\\n",
+       "0  41610426  41610426        GLOW THAI RESTAURANT   Brooklyn     7107   \n",
+       "1  40811162  40811162                   CARMINE'S  Manhattan     2450   \n",
+       "2  50012113  50012113                        TANG     Queens   196-50   \n",
+       "3  50014618  50014618                 TOTTO RAMEN  Manhattan      248   \n",
+       "4  50045782  50045782  OLLIE'S CHINESE RESTAURANT  Manhattan     2705   \n",
+       "\n",
+       "               STREET  ZIPCODE       PHONE CUISINE DESCRIPTION  \\\n",
+       "0            3 AVENUE  11209.0  7187481920                Thai   \n",
+       "1            BROADWAY  10024.0  2123622200             Italian   \n",
+       "2  NORTHERN BOULEVARD  11358.0  7182797080              Korean   \n",
+       "3    EAST   52 STREET  10022.0  2124210052            Japanese   \n",
+       "4            BROADWAY  10025.0  2129323300             Chinese   \n",
+       "\n",
+       "  INSPECTION DATE  ... RECORD DATE                        INSPECTION TYPE  \\\n",
+       "0      02/26/2020  ...  10/04/2021       Cycle Inspection / Re-inspection   \n",
+       "1      05/28/2019  ...  10/04/2021  Cycle Inspection / Initial Inspection   \n",
+       "2      08/16/2018  ...  10/04/2021  Cycle Inspection / Initial Inspection   \n",
+       "3      08/20/2018  ...  10/04/2021       Cycle Inspection / Re-inspection   \n",
+       "4      10/21/2019  ...  10/04/2021       Cycle Inspection / Re-inspection   \n",
+       "\n",
+       "    Latitude  Longitude  Community Board Council District Census Tract  \\\n",
+       "0  40.633865 -74.026798            310.0             43.0       6800.0   \n",
+       "1  40.791168 -73.974308            107.0              6.0      17900.0   \n",
+       "2  40.757850 -73.784593            411.0             19.0     145101.0   \n",
+       "3  40.756596 -73.968749            106.0              4.0       9800.0   \n",
+       "4  40.799318 -73.968440            107.0              6.0      19100.0   \n",
+       "\n",
+       "         BIN           BBL   NTA  \n",
+       "0  3146519.0  3.058910e+09  BK31  \n",
+       "1  1033560.0  1.012380e+09  MN12  \n",
+       "2  4124565.0  4.055200e+09  QN48  \n",
+       "3  1038490.0  1.013250e+09  MN19  \n",
+       "4  1056562.0  1.018750e+09  MN12  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wr.opensearch.search(\n",
+    "        client,\n",
+    "        index=\"nyc_restaurants_inspections_sample\",\n",
+    "        size=5\n",
+    ")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 3. Search Indices"
+    "## 3. Search<a class=\"anchor\" id=\"search\"></a>\n",
+    "#### Search results are returned as Pandas DataFrame"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### 3.1 Search by DSL"
+    "### 3.1 Search by DSL<a class=\"anchor\" id=\"search-dsl\"></a>"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>_id</th>\n",
+       "      <th>business_name</th>\n",
+       "      <th>inspection_id</th>\n",
+       "      <th>business_location.lon</th>\n",
+       "      <th>business_location.lat</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ff2f50f6-5415-4706-9bcb-af7c5eb0afa3</td>\n",
+       "      <td>Soup House</td>\n",
+       "      <td>5794_20160907</td>\n",
+       "      <td>-122.481299</td>\n",
+       "      <td>37.747228</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>7ba4fb17-f9a9-49da-b90e-8b3553d6d97c</td>\n",
+       "      <td>Soup-or-Salad</td>\n",
+       "      <td>5794_20160907</td>\n",
+       "      <td>-122.481299</td>\n",
+       "      <td>37.747228</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>b9e8f6a2-8fd1-4660-b041-2997a1a80984</td>\n",
+       "      <td>San Francisco Soup Company</td>\n",
+       "      <td>24936_20160609</td>\n",
+       "      <td>-122.400152</td>\n",
+       "      <td>37.793199</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>56b352e6-102b-4eff-8296-7e1fb2459bab</td>\n",
+       "      <td>Soup Unlimited</td>\n",
+       "      <td>60354_20161123</td>\n",
+       "      <td>-122.409061</td>\n",
+       "      <td>37.783527</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                    _id               business_name  \\\n",
+       "0  ff2f50f6-5415-4706-9bcb-af7c5eb0afa3                  Soup House   \n",
+       "1  7ba4fb17-f9a9-49da-b90e-8b3553d6d97c               Soup-or-Salad   \n",
+       "2  b9e8f6a2-8fd1-4660-b041-2997a1a80984  San Francisco Soup Company   \n",
+       "3  56b352e6-102b-4eff-8296-7e1fb2459bab              Soup Unlimited   \n",
+       "\n",
+       "    inspection_id  business_location.lon  business_location.lat  \n",
+       "0   5794_20160907            -122.481299              37.747228  \n",
+       "1   5794_20160907            -122.481299              37.747228  \n",
+       "2  24936_20160609            -122.400152              37.793199  \n",
+       "3  60354_20161123            -122.409061              37.783527  "
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "df = wr.opensearch.search(\n",
-    "         client=client,\n",
-    "         index='tutorials',\n",
-    "         search_body={\n",
-    "           \"query\": {\n",
-    "            \"match_all\": {\n",
-    "             }\n",
-    "           }\n",
-    "          }\n",
-    "     )\n"
+    "# add a search query. search all soup businesses \n",
+    "wr.opensearch.search(\n",
+    "        client,\n",
+    "        index=\"sf_restaurants_inspections\",\n",
+    "        _source=[\"inspection_id\", \"business_name\", \"business_location\"],\n",
+    "        filter_path=[\"hits.hits._id\",\"hits.hits._source\"],\n",
+    "        search_body={\n",
+    "            \"query\": {\n",
+    "                \"match\": {\n",
+    "                    \"business_name\": \"soup\"\n",
+    "                }\n",
+    "            }\n",
+    "        }\n",
+    "    )"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### 3.1 Search by SQL"
+    "### 3.1 Search by SQL<a class=\"anchor\" id=\"search-sql\"></a>"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>_index</th>\n",
+       "      <th>_type</th>\n",
+       "      <th>_id</th>\n",
+       "      <th>_score</th>\n",
+       "      <th>business_name</th>\n",
+       "      <th>inspection_score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>sf_restaurants_inspections_dedup</td>\n",
+       "      <td>_doc</td>\n",
+       "      <td>5794_20160907</td>\n",
+       "      <td>None</td>\n",
+       "      <td>Soup-or-Salad</td>\n",
+       "      <td>96</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>sf_restaurants_inspections_dedup</td>\n",
+       "      <td>_doc</td>\n",
+       "      <td>60354_20161123</td>\n",
+       "      <td>None</td>\n",
+       "      <td>Soup Unlimited</td>\n",
+       "      <td>95</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>sf_restaurants_inspections_dedup</td>\n",
+       "      <td>_doc</td>\n",
+       "      <td>24936_20160609</td>\n",
+       "      <td>None</td>\n",
+       "      <td>San Francisco Soup Company</td>\n",
+       "      <td>77</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                             _index _type             _id _score  \\\n",
+       "0  sf_restaurants_inspections_dedup  _doc   5794_20160907   None   \n",
+       "1  sf_restaurants_inspections_dedup  _doc  60354_20161123   None   \n",
+       "2  sf_restaurants_inspections_dedup  _doc  24936_20160609   None   \n",
+       "\n",
+       "                business_name  inspection_score  \n",
+       "0               Soup-or-Salad                96  \n",
+       "1              Soup Unlimited                95  \n",
+       "2  San Francisco Soup Company                77  "
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "df = wr.opensearch.search_by_sql(\n",
-    "         client=client,\n",
-    "         sql_query='SELECT * FROM tutorials LIMIT 50'\n",
-    "     )\n"
+    "wr.opensearch.search_by_sql(\n",
+    "        client,\n",
+    "        sql_query=\"\"\"SELECT business_name, inspection_score \n",
+    "                    FROM sf_restaurants_inspections_dedup\n",
+    "                    WHERE business_name LIKE '%soup%'\n",
+    "                    ORDER BY inspection_score DESC LIMIT 5\"\"\"\n",
+    ")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 4. Delete Index"
+    "## 4. Delete Indices<a class=\"anchor\" id=\"delete-index\"></a>"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 15,
    "metadata": {
-    "collapsed": false,
     "jupyter": {
      "outputs_hidden": false
     },
@@ -202,12 +1033,614 @@
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'acknowledged': True}"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "response = wr.opensearch.delete_index(\n",
+    "wr.opensearch.delete_index(\n",
     "     client=client,\n",
-    "     index=\"tutorials\",\n",
-    " )"
+    "     index=\"sf_restaurants_inspections\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Bonus - Prepare data and index from DataFrame<a class=\"anchor\" id=\"bonus\"></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For this exercise we'll use [DOHMH New York City Restaurant Inspection Results dataset](https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/43nn-pn8j)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv?accessType=DOWNLOAD')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Prepare the data for indexing<a class=\"anchor\" id=\"prepare-data\"></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# fields names underscore casing \n",
+    "df.columns = [col.lower().replace(' ', '_') for col in df.columns]\n",
+    "\n",
+    "# convert lon/lat to OpenSearch geo_point\n",
+    "df['business_location'] = \"POINT (\" + df.longitude.fillna('0').astype(str) + \" \" + df.latitude.fillna('0').astype(str) + \")\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create index with mapping<a class=\"anchor\" id=\"create-index-w-mapping\"></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'acknowledged': True,\n",
+       " 'shards_acknowledged': True,\n",
+       " 'index': 'nyc_restaurants_inspections'}"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# delete index if exists\n",
+    "wr.opensearch.delete_index(\n",
+    "    client=client,\n",
+    "    index=\"nyc_restaurants\"\n",
+    "    \n",
+    ")\n",
+    "\n",
+    "# use dynamic_template to map date fields\n",
+    "# define business_location as geo_point\n",
+    "wr.opensearch.create_index(\n",
+    "    client=client,\n",
+    "    index=\"nyc_restaurants_inspections\",\n",
+    "    mappings={\n",
+    "         \"dynamic_templates\" : [\n",
+    "            {\n",
+    "                \"dates\" : {\n",
+    "                   \"match\" : \"*date\",\n",
+    "                    \"mapping\" : {\n",
+    "                        \"type\" : \"date\",\n",
+    "                        \"format\" : 'MM/dd/yyyy'\n",
+    "                    }\n",
+    "                }\n",
+    "            }\n",
+    "        ],\n",
+    "         \"properties\": {\n",
+    "          \"business_location\": {\n",
+    "            \"type\": \"geo_point\"\n",
+    "          }\n",
+    "        }\n",
+    "    }   \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "### Index dataframe<a class=\"anchor\" id=\"index-df\"></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Indexing: 100% (382655/382655)|##########################|Elapsed Time: 0:04:15"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'success': 382655, 'errors': []}"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wr.opensearch.index_df(\n",
+    "    client,\n",
+    "    df=df,\n",
+    "    index=\"nyc_restaurants_inspections\",\n",
+    "    id_keys=[\"camis\"],\n",
+    "    bulk_size=1000\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Execute geo query<a class=\"anchor\" id=\"search-geo\"></a>\n",
+    "#### Sort restaurants by distance from Times-Square"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>camis</th>\n",
+       "      <th>dba</th>\n",
+       "      <th>boro</th>\n",
+       "      <th>building</th>\n",
+       "      <th>street</th>\n",
+       "      <th>zipcode</th>\n",
+       "      <th>phone</th>\n",
+       "      <th>cuisine_description</th>\n",
+       "      <th>inspection_date</th>\n",
+       "      <th>action</th>\n",
+       "      <th>...</th>\n",
+       "      <th>inspection_type</th>\n",
+       "      <th>latitude</th>\n",
+       "      <th>longitude</th>\n",
+       "      <th>community_board</th>\n",
+       "      <th>council_district</th>\n",
+       "      <th>census_tract</th>\n",
+       "      <th>bin</th>\n",
+       "      <th>bbl</th>\n",
+       "      <th>nta</th>\n",
+       "      <th>business_location</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>41551304</td>\n",
+       "      <td>THE COUNTER</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>7</td>\n",
+       "      <td>TIMES SQUARE</td>\n",
+       "      <td>10036.0</td>\n",
+       "      <td>2129976801</td>\n",
+       "      <td>American</td>\n",
+       "      <td>12/22/2016</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Cycle Inspection / Initial Inspection</td>\n",
+       "      <td>40.755908</td>\n",
+       "      <td>-73.986681</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>11300.0</td>\n",
+       "      <td>1086069.0</td>\n",
+       "      <td>1.009940e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.986680953809 40.755907817312)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>50055665</td>\n",
+       "      <td>ANN INC CAFE</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>7</td>\n",
+       "      <td>TIMES SQUARE</td>\n",
+       "      <td>10036.0</td>\n",
+       "      <td>2125413287</td>\n",
+       "      <td>American</td>\n",
+       "      <td>12/11/2019</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Cycle Inspection / Initial Inspection</td>\n",
+       "      <td>40.755908</td>\n",
+       "      <td>-73.986681</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>11300.0</td>\n",
+       "      <td>1086069.0</td>\n",
+       "      <td>1.009940e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.986680953809 40.755907817312)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>50049552</td>\n",
+       "      <td>ERNST AND YOUNG</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>5</td>\n",
+       "      <td>TIMES SQ</td>\n",
+       "      <td>10036.0</td>\n",
+       "      <td>2127739994</td>\n",
+       "      <td>Coffee/Tea</td>\n",
+       "      <td>11/30/2018</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Cycle Inspection / Initial Inspection</td>\n",
+       "      <td>40.755702</td>\n",
+       "      <td>-73.987208</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>11300.0</td>\n",
+       "      <td>1024656.0</td>\n",
+       "      <td>1.010130e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.987207980138 40.755702020307)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>50014078</td>\n",
+       "      <td>RED LOBSTER</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>5</td>\n",
+       "      <td>TIMES SQ</td>\n",
+       "      <td>10036.0</td>\n",
+       "      <td>2127306706</td>\n",
+       "      <td>Seafood</td>\n",
+       "      <td>10/03/2017</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Cycle Inspection / Initial Inspection</td>\n",
+       "      <td>40.755702</td>\n",
+       "      <td>-73.987208</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>11300.0</td>\n",
+       "      <td>1024656.0</td>\n",
+       "      <td>1.010130e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.987207980138 40.755702020307)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>50015171</td>\n",
+       "      <td>NEW AMSTERDAM THEATER</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>214</td>\n",
+       "      <td>WEST   42 STREET</td>\n",
+       "      <td>10036.0</td>\n",
+       "      <td>2125825472</td>\n",
+       "      <td>American</td>\n",
+       "      <td>06/26/2018</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Cycle Inspection / Re-inspection</td>\n",
+       "      <td>40.756317</td>\n",
+       "      <td>-73.987652</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>11300.0</td>\n",
+       "      <td>1024660.0</td>\n",
+       "      <td>1.010130e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.987651832547 40.756316895053)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>95</th>\n",
+       "      <td>41552060</td>\n",
+       "      <td>PROSKAUER ROSE</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>11</td>\n",
+       "      <td>TIMES SQUARE</td>\n",
+       "      <td>10036.0</td>\n",
+       "      <td>2129695493</td>\n",
+       "      <td>American</td>\n",
+       "      <td>08/11/2017</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Administrative Miscellaneous / Initial Inspection</td>\n",
+       "      <td>40.756891</td>\n",
+       "      <td>-73.990023</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>11300.0</td>\n",
+       "      <td>1087978.0</td>\n",
+       "      <td>1.010138e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.990023200823 40.756890780426)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>96</th>\n",
+       "      <td>41242148</td>\n",
+       "      <td>GABBY O'HARA'S</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>123</td>\n",
+       "      <td>WEST   39 STREET</td>\n",
+       "      <td>10018.0</td>\n",
+       "      <td>2122788984</td>\n",
+       "      <td>Irish</td>\n",
+       "      <td>07/30/2019</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Cycle Inspection / Re-inspection</td>\n",
+       "      <td>40.753405</td>\n",
+       "      <td>-73.986602</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>11300.0</td>\n",
+       "      <td>1080611.0</td>\n",
+       "      <td>1.008150e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.986602050292 40.753404587174)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>97</th>\n",
+       "      <td>50095860</td>\n",
+       "      <td>THE TIMES EATERY</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>680</td>\n",
+       "      <td>8 AVENUE</td>\n",
+       "      <td>10036.0</td>\n",
+       "      <td>6463867787</td>\n",
+       "      <td>American</td>\n",
+       "      <td>02/28/2020</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Pre-permit (Operational) / Initial Inspection</td>\n",
+       "      <td>40.757991</td>\n",
+       "      <td>-73.989218</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>11900.0</td>\n",
+       "      <td>1024703.0</td>\n",
+       "      <td>1.010150e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.989218092096 40.757991356019)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>98</th>\n",
+       "      <td>50072861</td>\n",
+       "      <td>ITSU</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>530</td>\n",
+       "      <td>7 AVENUE</td>\n",
+       "      <td>10018.0</td>\n",
+       "      <td>9176393645</td>\n",
+       "      <td>Asian/Asian Fusion</td>\n",
+       "      <td>09/10/2018</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Pre-permit (Operational) / Initial Inspection</td>\n",
+       "      <td>40.753844</td>\n",
+       "      <td>-73.988551</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>11300.0</td>\n",
+       "      <td>1014485.0</td>\n",
+       "      <td>1.007880e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.988551029682 40.753843959794)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>99</th>\n",
+       "      <td>50068109</td>\n",
+       "      <td>LUKE'S LOBSTER</td>\n",
+       "      <td>Manhattan</td>\n",
+       "      <td>1407</td>\n",
+       "      <td>BROADWAY</td>\n",
+       "      <td>10018.0</td>\n",
+       "      <td>9174759192</td>\n",
+       "      <td>Seafood</td>\n",
+       "      <td>09/06/2017</td>\n",
+       "      <td>Violations were cited in the following area(s).</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Pre-permit (Operational) / Initial Inspection</td>\n",
+       "      <td>40.753432</td>\n",
+       "      <td>-73.987151</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>11300.0</td>\n",
+       "      <td>1015265.0</td>\n",
+       "      <td>1.008140e+09</td>\n",
+       "      <td>MN17</td>\n",
+       "      <td>POINT (-73.98715066791 40.753432097521)</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>100 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       camis                    dba       boro building            street  \\\n",
+       "0   41551304            THE COUNTER  Manhattan        7      TIMES SQUARE   \n",
+       "1   50055665           ANN INC CAFE  Manhattan        7      TIMES SQUARE   \n",
+       "2   50049552        ERNST AND YOUNG  Manhattan        5          TIMES SQ   \n",
+       "3   50014078            RED LOBSTER  Manhattan        5          TIMES SQ   \n",
+       "4   50015171  NEW AMSTERDAM THEATER  Manhattan      214  WEST   42 STREET   \n",
+       "..       ...                    ...        ...      ...               ...   \n",
+       "95  41552060         PROSKAUER ROSE  Manhattan       11      TIMES SQUARE   \n",
+       "96  41242148         GABBY O'HARA'S  Manhattan      123  WEST   39 STREET   \n",
+       "97  50095860       THE TIMES EATERY  Manhattan      680          8 AVENUE   \n",
+       "98  50072861                   ITSU  Manhattan      530          7 AVENUE   \n",
+       "99  50068109         LUKE'S LOBSTER  Manhattan     1407          BROADWAY   \n",
+       "\n",
+       "    zipcode       phone cuisine_description inspection_date  \\\n",
+       "0   10036.0  2129976801            American      12/22/2016   \n",
+       "1   10036.0  2125413287            American      12/11/2019   \n",
+       "2   10036.0  2127739994          Coffee/Tea      11/30/2018   \n",
+       "3   10036.0  2127306706             Seafood      10/03/2017   \n",
+       "4   10036.0  2125825472            American      06/26/2018   \n",
+       "..      ...         ...                 ...             ...   \n",
+       "95  10036.0  2129695493            American      08/11/2017   \n",
+       "96  10018.0  2122788984               Irish      07/30/2019   \n",
+       "97  10036.0  6463867787            American      02/28/2020   \n",
+       "98  10018.0  9176393645  Asian/Asian Fusion      09/10/2018   \n",
+       "99  10018.0  9174759192             Seafood      09/06/2017   \n",
+       "\n",
+       "                                             action  ...  \\\n",
+       "0   Violations were cited in the following area(s).  ...   \n",
+       "1   Violations were cited in the following area(s).  ...   \n",
+       "2   Violations were cited in the following area(s).  ...   \n",
+       "3   Violations were cited in the following area(s).  ...   \n",
+       "4   Violations were cited in the following area(s).  ...   \n",
+       "..                                              ...  ...   \n",
+       "95  Violations were cited in the following area(s).  ...   \n",
+       "96  Violations were cited in the following area(s).  ...   \n",
+       "97  Violations were cited in the following area(s).  ...   \n",
+       "98  Violations were cited in the following area(s).  ...   \n",
+       "99  Violations were cited in the following area(s).  ...   \n",
+       "\n",
+       "                                      inspection_type   latitude  longitude  \\\n",
+       "0               Cycle Inspection / Initial Inspection  40.755908 -73.986681   \n",
+       "1               Cycle Inspection / Initial Inspection  40.755908 -73.986681   \n",
+       "2               Cycle Inspection / Initial Inspection  40.755702 -73.987208   \n",
+       "3               Cycle Inspection / Initial Inspection  40.755702 -73.987208   \n",
+       "4                    Cycle Inspection / Re-inspection  40.756317 -73.987652   \n",
+       "..                                                ...        ...        ...   \n",
+       "95  Administrative Miscellaneous / Initial Inspection  40.756891 -73.990023   \n",
+       "96                   Cycle Inspection / Re-inspection  40.753405 -73.986602   \n",
+       "97      Pre-permit (Operational) / Initial Inspection  40.757991 -73.989218   \n",
+       "98      Pre-permit (Operational) / Initial Inspection  40.753844 -73.988551   \n",
+       "99      Pre-permit (Operational) / Initial Inspection  40.753432 -73.987151   \n",
+       "\n",
+       "    community_board council_district census_tract        bin           bbl  \\\n",
+       "0             105.0              3.0      11300.0  1086069.0  1.009940e+09   \n",
+       "1             105.0              3.0      11300.0  1086069.0  1.009940e+09   \n",
+       "2             105.0              3.0      11300.0  1024656.0  1.010130e+09   \n",
+       "3             105.0              3.0      11300.0  1024656.0  1.010130e+09   \n",
+       "4             105.0              3.0      11300.0  1024660.0  1.010130e+09   \n",
+       "..              ...              ...          ...        ...           ...   \n",
+       "95            105.0              3.0      11300.0  1087978.0  1.010138e+09   \n",
+       "96            105.0              4.0      11300.0  1080611.0  1.008150e+09   \n",
+       "97            105.0              3.0      11900.0  1024703.0  1.010150e+09   \n",
+       "98            105.0              3.0      11300.0  1014485.0  1.007880e+09   \n",
+       "99            105.0              3.0      11300.0  1015265.0  1.008140e+09   \n",
+       "\n",
+       "     nta                         business_location  \n",
+       "0   MN17  POINT (-73.986680953809 40.755907817312)  \n",
+       "1   MN17  POINT (-73.986680953809 40.755907817312)  \n",
+       "2   MN17  POINT (-73.987207980138 40.755702020307)  \n",
+       "3   MN17  POINT (-73.987207980138 40.755702020307)  \n",
+       "4   MN17  POINT (-73.987651832547 40.756316895053)  \n",
+       "..   ...                                       ...  \n",
+       "95  MN17  POINT (-73.990023200823 40.756890780426)  \n",
+       "96  MN17  POINT (-73.986602050292 40.753404587174)  \n",
+       "97  MN17  POINT (-73.989218092096 40.757991356019)  \n",
+       "98  MN17  POINT (-73.988551029682 40.753843959794)  \n",
+       "99  MN17   POINT (-73.98715066791 40.753432097521)  \n",
+       "\n",
+       "[100 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wr.opensearch.search(\n",
+    "    client,\n",
+    "    index=\"nyc_restaurants_inspections\",\n",
+    "    filter_path=[\"hits.hits._source\"],\n",
+    "    size=100,\n",
+    "    search_body={\n",
+    "        \"query\": {\n",
+    "            \"match_all\": {}\n",
+    "        },\n",
+    "          \"sort\": [\n",
+    "            {\n",
+    "              \"_geo_distance\": {\n",
+    "                \"business_location\": { # Times-Square - https://geojson.io/#map=16/40.7563/-73.9862\n",
+    "                  \"lat\":  40.75613228383523,\n",
+    "                  \"lon\": -73.9865791797638\n",
+    "                },\n",
+    "                \"order\": \"asc\"\n",
+    "              }\n",
+    "            }\n",
+    "        ]\n",
+    "    }\n",
+    ")"
    ]
   }
  ],
@@ -227,7 +1660,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.7.7"
   }
  },
  "nbformat": 4,

From 53dff4b9198077bfeec6d73e45cec2c9eb6cd75c Mon Sep 17 00:00:00 2001
From: Abdel Jaidi <jaidisido@gmail.com>
Date: Tue, 5 Oct 2021 17:15:22 +0100
Subject: [PATCH 38/41] Minor - Pylint

---
 awswrangler/opensearch/_read.py  | 2 +-
 awswrangler/opensearch/_utils.py | 2 +-
 awswrangler/opensearch/_write.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index ba345ab43..8f2ef95c1 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -15,7 +15,7 @@ def _resolve_fields(row: Mapping[str, Any]) -> Mapping[str, Any]:
         if isinstance(row[field], dict):
             nested_fields = _resolve_fields(row[field])
             for n_field, val in nested_fields.items():
-                fields["{}.{}".format(field, n_field)] = val
+                fields[f"{field}.{n_field}"] = val
         else:
             fields[field] = row[field]
     return fields
diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index b2a139cbf..a48b0eadc 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -75,7 +75,7 @@ def connect(
     valid_ports = {80, 443}
 
     if port not in valid_ports:
-        raise ValueError("results: port must be one of %r." % valid_ports)
+        raise ValueError(f"results: port must be one of {valid_ports}")
 
     if username and password:
         http_auth = (username, password)
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 5bb081909..62e9d146e 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -84,7 +84,7 @@ def _deserialize(v: Any) -> Any:
 
 
 def _file_line_generator(path: str, is_json: bool = False) -> Generator[Any, None, None]:
-    with open(path) as fp:
+    with open(path) as fp:  # pylint: disable=W1514
         for line in fp:
             if is_json:
                 yield json.loads(line)

From c6e6d8051ab026550a8c98ec9e134a41fb1334a8 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 5 Oct 2021 14:47:39 -0400
Subject: [PATCH 39/41] [skip ci] opensearch: pylint f-string and file open
 encoding

---
 awswrangler/opensearch/_read.py  | 2 +-
 awswrangler/opensearch/_utils.py | 2 +-
 awswrangler/opensearch/_write.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/awswrangler/opensearch/_read.py b/awswrangler/opensearch/_read.py
index ba345ab43..8f2ef95c1 100644
--- a/awswrangler/opensearch/_read.py
+++ b/awswrangler/opensearch/_read.py
@@ -15,7 +15,7 @@ def _resolve_fields(row: Mapping[str, Any]) -> Mapping[str, Any]:
         if isinstance(row[field], dict):
             nested_fields = _resolve_fields(row[field])
             for n_field, val in nested_fields.items():
-                fields["{}.{}".format(field, n_field)] = val
+                fields[f"{field}.{n_field}"] = val
         else:
             fields[field] = row[field]
     return fields
diff --git a/awswrangler/opensearch/_utils.py b/awswrangler/opensearch/_utils.py
index b2a139cbf..6c5ef99c7 100644
--- a/awswrangler/opensearch/_utils.py
+++ b/awswrangler/opensearch/_utils.py
@@ -75,7 +75,7 @@ def connect(
     valid_ports = {80, 443}
 
     if port not in valid_ports:
-        raise ValueError("results: port must be one of %r." % valid_ports)
+        raise ValueError(f"results: port must be one of {valid_ports}.")
 
     if username and password:
         http_auth = (username, password)
diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py
index 5bb081909..3ea466693 100644
--- a/awswrangler/opensearch/_write.py
+++ b/awswrangler/opensearch/_write.py
@@ -84,7 +84,7 @@ def _deserialize(v: Any) -> Any:
 
 
 def _file_line_generator(path: str, is_json: bool = False) -> Generator[Any, None, None]:
-    with open(path) as fp:
+    with open(path, encoding="utf-8") as fp:
         for line in fp:
             if is_json:
                 yield json.loads(line)

From 29f892c3c9ed2764b5a3045b8cbd8cfcc6ea87c2 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Tue, 5 Oct 2021 20:38:39 -0400
Subject: [PATCH 40/41] opensearch: add to CONTRIBUTING.md

---
 CONTRIBUTING.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d3420ade6..e898ec21e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -215,6 +215,10 @@ or
 ``./deploy-base.sh``
 ``./deploy-databases.sh``
 
+* [OPTIONAL] Deploy the Cloudformation template `opensearch.yaml` (if you need to test Amazon OpenSearch Service). This step could take about 15 minutes to deploy.
+
+``./deploy-opensearch.sh``
+
 * Go to the `EC2 -> SecurityGroups` console, open the `aws-data-wrangler-*` security group and configure to accept your IP from any TCP port.
   - Alternatively run:
   
@@ -244,7 +248,7 @@ or
 
 ``pytest -n 8 tests/test_db.py``
 
-* To run all data lake test functions for all python versions (Only if Amazon QuickSight is activated):
+* To run all data lake test functions for all python versions (Only if Amazon QuickSight is activated and Amazon OpenSearch template is deployed):
 
 ``./test.sh``
 

From 827c3bf4c79a71352a14b8260f4057e2b363c051 Mon Sep 17 00:00:00 2001
From: Assaf Mentzer <mentzera@amazon.com>
Date: Wed, 6 Oct 2021 11:39:22 -0400
Subject: [PATCH 41/41] opensearch: update aws-cdk packages to have the same
 minimum version

---
 test_infra/poetry.lock    |  2 +-
 test_infra/pyproject.toml | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/test_infra/poetry.lock b/test_infra/poetry.lock
index c4e5df69b..aa17ff35f 100644
--- a/test_infra/poetry.lock
+++ b/test_infra/poetry.lock
@@ -755,7 +755,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.6.2, <3.10"
-content-hash = "7fe703d54794d69aab0dd6ad5b4017c43defbff76ed9a3fe10e243c422adfea6"
+content-hash = "6d95fccb052c85375178aa3ade72de9e4ee87c009d7e067dd7d4120c23ded9f5"
 
 [metadata.files]
 attrs = [
diff --git a/test_infra/pyproject.toml b/test_infra/pyproject.toml
index 761c315d7..02e0241d8 100644
--- a/test_infra/pyproject.toml
+++ b/test_infra/pyproject.toml
@@ -7,15 +7,15 @@ license = "Apache License 2.0"
 
 [tool.poetry.dependencies]
 python = ">=3.6.2, <3.10"
-"aws-cdk.core" = "^1.115.0"
-"aws-cdk.aws-ec2" = "^1.115.0"
-"aws-cdk.aws-glue" = "^1.115.0"
-"aws-cdk.aws-iam" = "^1.115.0"
-"aws-cdk.aws-kms" = "^1.115.0"
-"aws-cdk.aws-logs" = "^1.115.0"
-"aws-cdk.aws-s3" = "^1.115.0"
-"aws-cdk.aws-redshift" = "^1.115.0"
-"aws-cdk.aws-rds" = "^1.115.0"
-"aws-cdk.aws-secretsmanager" = "^1.115.0"
-"aws-cdk.aws-ssm" = "^1.115.0"
+"aws-cdk.core" = "^1.124.0"
+"aws-cdk.aws-ec2" = "^1.124.0"
+"aws-cdk.aws-glue" = "^1.124.0"
+"aws-cdk.aws-iam" = "^1.124.0"
+"aws-cdk.aws-kms" = "^1.124.0"
+"aws-cdk.aws-logs" = "^1.124.0"
+"aws-cdk.aws-s3" = "^1.124.0"
+"aws-cdk.aws-redshift" = "^1.124.0"
+"aws-cdk.aws-rds" = "^1.124.0"
+"aws-cdk.aws-secretsmanager" = "^1.124.0"
+"aws-cdk.aws-ssm" = "^1.124.0"
 "aws-cdk.aws-opensearchservice" = "^1.124.0"