From c35ac1cb27620cb8a11a4b4a6cc8f2b0ff307624 Mon Sep 17 00:00:00 2001 From: Anton Kukushkin Date: Thu, 11 May 2023 16:15:25 +0100 Subject: [PATCH 1/9] fix: Return previous .do_write() implementation for backward compatibility with ray 2.0 --- .../pandas_file_based_datasource.py | 79 ++++++++++++++----- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py b/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py index 979b9aa51..7baa70508 100644 --- a/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py +++ b/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py @@ -7,7 +7,7 @@ import pyarrow import ray from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder -from ray.data.block import Block, BlockAccessor +from ray.data.block import Block, BlockAccessor, BlockMetadata from ray.data.datasource.datasource import WriteResult from ray.data.datasource.file_based_datasource import ( BlockWritePathProvider, @@ -64,34 +64,73 @@ def __init__(self) -> None: def _read_file(self, f: pyarrow.NativeFile, path: str, **reader_args: Any) -> pd.DataFrame: raise NotImplementedError() - def do_write( + def do_write( # type: ignore[override] # pylint: disable=arguments-differ self, blocks: List[ObjectRef[pd.DataFrame]], - *args: Any, - **kwargs: Any, + metadata: List[BlockMetadata], + path: str, + dataset_uuid: str, + filesystem: Optional[pyarrow.fs.FileSystem] = None, + try_create_dir: bool = True, + open_stream_args: Optional[Dict[str, Any]] = None, + block_path_provider: BlockWritePathProvider = DefaultBlockWritePathProvider(), + write_args_fn: Callable[[], Dict[str, Any]] = lambda: {}, + _block_udf: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None, + ray_remote_args: Optional[Dict[str, Any]] = None, + s3_additional_kwargs: Optional[Dict[str, str]] = None, + pandas_kwargs: Optional[Dict[str, Any]] = None, + compression: Optional[str] = None, + mode: str = "wb", + **write_args: Any, ) -> List[ObjectRef[WriteResult]]: - """Create and return write tasks for a file-based datasource. + """Create and return write tasks for a file-based datasource.""" + _write_block_to_file = self._write_block - Note: In Ray 2.4+ write semantics has changed. datasource.do_write() was deprecated in favour of - datasource.write() that represents a single write task and enables it to be captured by execution - plan allowing query optimisation ("fuse" with other operations). The change is not backward-compatible - with earlier versions still attempting to call do_write(). - """ - write_tasks = [] - path: str = kwargs.pop("path") - dataset_uuid: str = kwargs.pop("dataset_uuid") - ray_remote_args: Dict[str, Any] = kwargs.pop("ray_remote_args") or {} + if ray_remote_args is None: + ray_remote_args = {} + + if pandas_kwargs is None: + pandas_kwargs = {} + + if not compression: + compression = pandas_kwargs.get("compression") + + def write_block(write_path: str, block: pd.DataFrame) -> str: + if _block_udf is not None: + block = _block_udf(block) - _write = ray_remote(**ray_remote_args)(self.write) + with open_s3_object( + path=write_path, + mode=mode, + use_threads=False, + s3_additional_kwargs=s3_additional_kwargs, + encoding=write_args.get("encoding"), + newline=write_args.get("newline"), + ) as f: + _write_block_to_file( + f, + BlockAccessor.for_block(block), + pandas_kwargs=pandas_kwargs, + compression=compression, + **write_args, + ) + return write_path + + write_block_fn = ray_remote(**ray_remote_args)(write_block) + + file_suffix = self._get_file_suffix(self._FILE_EXTENSION, compression) + write_tasks = [] for block_idx, block in enumerate(blocks): - write_task = _write( - [block], - TaskContext(task_idx=block_idx), + write_path = block_path_provider( path, - dataset_uuid, - **kwargs, + filesystem=filesystem, + dataset_uuid=dataset_uuid, + block=block, + block_index=block_idx, + file_format=file_suffix, ) + write_task = write_block_fn(write_path, block) write_tasks.append(write_task) return write_tasks From a3ab1f520a2e52d752cfe458330db8d456effae9 Mon Sep 17 00:00:00 2001 From: Anton Kukushkin Date: Thu, 11 May 2023 16:17:32 +0100 Subject: [PATCH 2/9] fix: add docstrings --- .../ray/datasources/pandas_file_based_datasource.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py b/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py index 7baa70508..2b8b4f539 100644 --- a/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py +++ b/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py @@ -83,7 +83,13 @@ def do_write( # type: ignore[override] # pylint: disable=arguments-differ mode: str = "wb", **write_args: Any, ) -> List[ObjectRef[WriteResult]]: - """Create and return write tasks for a file-based datasource.""" + """Create and return write tasks for a file-based datasource. + + Note: In Ray 2.4+ write semantics has changed. datasource.do_write() was deprecated in favour of + datasource.write() that represents a single write task and enables it to be captured by execution + plan allowing query optimisation ("fuse" with other operations). The change is not backward-compatible + with earlier versions still attempting to call do_write(). + """ _write_block_to_file = self._write_block if ray_remote_args is None: From 67ffebf2b87fa31a0917431c20c02969ba2dd2ec Mon Sep 17 00:00:00 2001 From: Anton Kukushkin Date: Fri, 12 May 2023 13:38:51 +0100 Subject: [PATCH 3/9] tests: Temp skip the two tests causing recurrent issues --- tests/load/test_databases.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/load/test_databases.py b/tests/load/test_databases.py index eb49bf4e4..ca73f84e4 100644 --- a/tests/load/test_databases.py +++ b/tests/load/test_databases.py @@ -38,6 +38,7 @@ def test_timestream_write( assert df["counter"].iloc[0] == 126_000 +@pytest.mark.skip(reason="Temporary skip due to boto3 version mismatch on the cluster image") @pytest.mark.parametrize("benchmark_time", [90]) def test_timestream_batch_load( benchmark_time: int, timestream_database_and_table: str, df_timestream: pd.DataFrame, path: str, path2: str, request @@ -69,6 +70,7 @@ def test_timestream_batch_load( assert df["counter"].iloc[0] == 126_000 +@pytest.mark.skip(reason="Temporary skip due to Ray cluster loosing connectivity issue") @pytest.mark.parametrize("benchmark_time_copy", [150]) @pytest.mark.parametrize("benchmark_time_unload", [150]) def test_redshift_copy_unload( From 311abb7b28c7932cced733116125cc26bf9cdead Mon Sep 17 00:00:00 2001 From: Anton Kukushkin Date: Fri, 12 May 2023 15:47:35 +0100 Subject: [PATCH 4/9] [skip ci] Revert "tests: Temp skip the two tests causing recurrent issues" This reverts commit 67ffebf2b87fa31a0917431c20c02969ba2dd2ec. --- tests/load/test_databases.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/load/test_databases.py b/tests/load/test_databases.py index ca73f84e4..eb49bf4e4 100644 --- a/tests/load/test_databases.py +++ b/tests/load/test_databases.py @@ -38,7 +38,6 @@ def test_timestream_write( assert df["counter"].iloc[0] == 126_000 -@pytest.mark.skip(reason="Temporary skip due to boto3 version mismatch on the cluster image") @pytest.mark.parametrize("benchmark_time", [90]) def test_timestream_batch_load( benchmark_time: int, timestream_database_and_table: str, df_timestream: pd.DataFrame, path: str, path2: str, request @@ -70,7 +69,6 @@ def test_timestream_batch_load( assert df["counter"].iloc[0] == 126_000 -@pytest.mark.skip(reason="Temporary skip due to Ray cluster loosing connectivity issue") @pytest.mark.parametrize("benchmark_time_copy", [150]) @pytest.mark.parametrize("benchmark_time_unload", [150]) def test_redshift_copy_unload( From 96f7a518cb5cfe0cea8883d23b8e59cffdf38dd5 Mon Sep 17 00:00:00 2001 From: Anton Kukushkin Date: Fri, 12 May 2023 15:55:48 +0100 Subject: [PATCH 5/9] [skip ci] mypy --- .../distributed/ray/datasources/pandas_file_based_datasource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py b/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py index 2b8b4f539..a6ebd81cb 100644 --- a/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py +++ b/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py @@ -64,7 +64,7 @@ def __init__(self) -> None: def _read_file(self, f: pyarrow.NativeFile, path: str, **reader_args: Any) -> pd.DataFrame: raise NotImplementedError() - def do_write( # type: ignore[override] # pylint: disable=arguments-differ + def do_write( # pylint: disable=arguments-differ self, blocks: List[ObjectRef[pd.DataFrame]], metadata: List[BlockMetadata], From 110c10d3b783d23ff6279a21e45a9b87a5feebb7 Mon Sep 17 00:00:00 2001 From: Anton Kukushkin Date: Fri, 12 May 2023 16:11:06 +0100 Subject: [PATCH 6/9] [skip ci] Temp skip timestream due to boto3 version mismatch on the cluster --- tests/load/test_databases.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/load/test_databases.py b/tests/load/test_databases.py index eb49bf4e4..afe08d868 100644 --- a/tests/load/test_databases.py +++ b/tests/load/test_databases.py @@ -38,6 +38,7 @@ def test_timestream_write( assert df["counter"].iloc[0] == 126_000 +@pytest.mark.skip(reason="Temporary skip due to boto3 version mismatch on the cluster image") @pytest.mark.parametrize("benchmark_time", [90]) def test_timestream_batch_load( benchmark_time: int, timestream_database_and_table: str, df_timestream: pd.DataFrame, path: str, path2: str, request From 0a1c00665f48458ce84f75b819eb3811a1bb85f1 Mon Sep 17 00:00:00 2001 From: Anton Kukushkin Date: Mon, 15 May 2023 12:46:37 +0100 Subject: [PATCH 7/9] Un-skip timestream batch load test case --- tests/load/test_databases.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/load/test_databases.py b/tests/load/test_databases.py index afe08d868..eb49bf4e4 100644 --- a/tests/load/test_databases.py +++ b/tests/load/test_databases.py @@ -38,7 +38,6 @@ def test_timestream_write( assert df["counter"].iloc[0] == 126_000 -@pytest.mark.skip(reason="Temporary skip due to boto3 version mismatch on the cluster image") @pytest.mark.parametrize("benchmark_time", [90]) def test_timestream_batch_load( benchmark_time: int, timestream_database_and_table: str, df_timestream: pd.DataFrame, path: str, path2: str, request From 127701a4014e6df8a13a0d2ef6d250cb54f81276 Mon Sep 17 00:00:00 2001 From: Anton Kukushkin Date: Mon, 15 May 2023 13:45:29 +0100 Subject: [PATCH 8/9] [skip ci] Test infa - up-cap Python version & update poetry.lock --- test_infra/poetry.lock | 262 +++++++++++++++++++++++--------------- test_infra/pyproject.toml | 2 +- 2 files changed, 162 insertions(+), 102 deletions(-) diff --git a/test_infra/poetry.lock b/test_infra/poetry.lock index 73b57c3ba..76304d943 100644 --- a/test_infra/poetry.lock +++ b/test_infra/poetry.lock @@ -1,38 +1,31 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. - [[package]] name = "attrs" -version = "22.2.0" +version = "23.1.0" description = "Classes Without Boilerplate" category = "main" optional = false -python-versions = ">=3.6" -files = [ - {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, - {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, -] +python-versions = ">=3.7" + +[package.dependencies] +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} [package.extras] -cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] -dev = ["attrs[docs,tests]"] -docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"] -tests = ["attrs[tests-no-zope]", "zope.interface"] -tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] [[package]] name = "aws-cdk-asset-awscli-v1" -version = "2.2.69" +version = "2.2.173" description = "A library that contains the AWS CLI for use in Lambda Layers" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.asset-awscli-v1-2.2.69.tar.gz", hash = "sha256:a076a29075ce863d2a3cae2036f31d2317e5dc52f97780d828bf67d37a3fb523"}, - {file = "aws_cdk.asset_awscli_v1-2.2.69-py3-none-any.whl", hash = "sha256:e53d352bc50c566bbbbcbb85b17f98432af853eff16559570c6d5d6be58bb44e"}, -] [package.dependencies] -jsii = ">=1.75.0,<2.0.0" +jsii = ">=1.81.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" @@ -43,10 +36,6 @@ description = "A library that contains kubectl for use in Lambda Layers" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.asset-kubectl-v20-2.1.1.tar.gz", hash = "sha256:9834cdb150c5590aea4e5eba6de2a89b4c60617451181c524810c5a75154565c"}, - {file = "aws_cdk.asset_kubectl_v20-2.1.1-py3-none-any.whl", hash = "sha256:a2fad1a5a35a94a465efe60859f91e45dacc33261fb9bbf1cf9bbc6e2f70e9d6"}, -] [package.dependencies] jsii = ">=1.70.0,<2.0.0" @@ -55,96 +44,76 @@ typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-asset-node-proxy-agent-v5" -version = "2.0.58" +version = "2.0.146" description = "@aws-cdk/asset-node-proxy-agent-v5" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.asset-node-proxy-agent-v5-2.0.58.tar.gz", hash = "sha256:2b045e6d1ec8f3290ab32b839ed26fddd691dfa2cb8868fc2d641a3c9b768cad"}, - {file = "aws_cdk.asset_node_proxy_agent_v5-2.0.58-py3-none-any.whl", hash = "sha256:e9b913a613342f277505eb32ac268d699ebf66624940509b2e33453392f6f90a"}, -] [package.dependencies] -jsii = ">=1.75.0,<2.0.0" +jsii = ">=1.81.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-aws-glue-alpha" -version = "2.65.0a0" +version = "2.79.1a0" description = "The CDK Construct Library for AWS::Glue" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.aws-glue-alpha-2.65.0a0.tar.gz", hash = "sha256:c36f4bc149115bd2798b52df4fe1106f5d2b829f121ece3a913d48ed8953b673"}, - {file = "aws_cdk.aws_glue_alpha-2.65.0a0-py3-none-any.whl", hash = "sha256:5df1bd87e0c4fd3d0735b3f2d6f5cfbba5b6e9d2e3821ba661f0682d519647af"}, -] [package.dependencies] -aws-cdk-lib = ">=2.65.0,<3.0.0" +aws-cdk-lib = "2.79.1" constructs = ">=10.0.0,<11.0.0" -jsii = ">=1.74.0,<2.0.0" +jsii = ">=1.80.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-aws-neptune-alpha" -version = "2.65.0a0" +version = "2.79.1a0" description = "The CDK Construct Library for AWS::Neptune" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.aws-neptune-alpha-2.65.0a0.tar.gz", hash = "sha256:1ddd153c0304392e3aa816394de9a517e33b0e1460e10e7f031459ce4c864b85"}, - {file = "aws_cdk.aws_neptune_alpha-2.65.0a0-py3-none-any.whl", hash = "sha256:6f383d13821d01315c9f8728ac016c863d2cba837edea9216cd350267c3954b8"}, -] [package.dependencies] -aws-cdk-lib = ">=2.65.0,<3.0.0" +aws-cdk-lib = "2.79.1" constructs = ">=10.0.0,<11.0.0" -jsii = ">=1.74.0,<2.0.0" +jsii = ">=1.80.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-aws-redshift-alpha" -version = "2.65.0a0" +version = "2.79.1a0" description = "The CDK Construct Library for AWS::Redshift" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.aws-redshift-alpha-2.65.0a0.tar.gz", hash = "sha256:6e871b76f210cea9a358bb1b87e6134284be655320e8c475fc70de1aa1855f74"}, - {file = "aws_cdk.aws_redshift_alpha-2.65.0a0-py3-none-any.whl", hash = "sha256:4416278ab70af79f19093db2b94e9ea3a46fb107ebd87f5c282c8de09ca84917"}, -] [package.dependencies] -aws-cdk-lib = ">=2.65.0,<3.0.0" +aws-cdk-lib = "2.79.1" constructs = ">=10.0.0,<11.0.0" -jsii = ">=1.74.0,<2.0.0" +jsii = ">=1.80.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-lib" -version = "2.65.0" +version = "2.79.1" description = "Version 2 of the AWS Cloud Development Kit library" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk-lib-2.65.0.tar.gz", hash = "sha256:7d2ea69f827b7f325567109c482ead017f5d2e0fc071d4b352f5db87c24ba010"}, - {file = "aws_cdk_lib-2.65.0-py3-none-any.whl", hash = "sha256:ea12088a72b858a9bf1aaaf0f6de5e6dfe5962b382324239eb7c830327b84f7c"}, -] [package.dependencies] -"aws-cdk.asset-awscli-v1" = ">=2.2.65,<3.0.0" +"aws-cdk.asset-awscli-v1" = ">=2.2.165,<3.0.0" "aws-cdk.asset-kubectl-v20" = ">=2.1.1,<3.0.0" -"aws-cdk.asset-node-proxy-agent-v5" = ">=2.0.54,<3.0.0" +"aws-cdk.asset-node-proxy-agent-v5" = ">=2.0.139,<3.0.0" constructs = ">=10.0.0,<11.0.0" -jsii = ">=1.74.0,<2.0.0" +jsii = ">=1.80.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" @@ -155,10 +124,6 @@ description = "Composable complex class support for attrs and dataclasses." category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "cattrs-22.2.0-py3-none-any.whl", hash = "sha256:bc12b1f0d000b9f9bee83335887d532a1d3e99a833d1bf0882151c97d3e68c21"}, - {file = "cattrs-22.2.0.tar.gz", hash = "sha256:f0eed5642399423cf656e7b66ce92cdc5b963ecafd041d1b24d136fdde7acf6d"}, -] [package.dependencies] attrs = ">=20" @@ -167,51 +132,72 @@ typing_extensions = {version = "*", markers = "python_version < \"3.8\""} [[package]] name = "constructs" -version = "10.1.252" +version = "10.2.24" description = "A programming model for software-defined state" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "constructs-10.1.252-py3-none-any.whl", hash = "sha256:cf115fe7729c93ce01b1839e8760be24606c694b96b628230711cbf9b8fa1acd"}, - {file = "constructs-10.1.252.tar.gz", hash = "sha256:1ee8f4a11c4515730dff1de9c40342dd37bd7bd5f5650a500c06a22954e373a2"}, -] [package.dependencies] -jsii = ">=1.75.0,<2.0.0" +jsii = ">=1.81.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "exceptiongroup" -version = "1.1.0" +version = "1.1.1" description = "Backport of PEP 654 (exception groups)" category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "exceptiongroup-1.1.0-py3-none-any.whl", hash = "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e"}, - {file = "exceptiongroup-1.1.0.tar.gz", hash = "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"}, -] [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "importlib-metadata" +version = "6.6.0" +description = "Read metadata from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] + +[[package]] +name = "importlib-resources" +version = "5.12.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + [[package]] name = "jsii" -version = "1.75.0" +version = "1.81.0" description = "Python client for jsii runtime" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "jsii-1.75.0-py3-none-any.whl", hash = "sha256:0a36266470e223413f5e3b10ab656bb0a9c8a8902aa180a0c1ebcc93cc15cfce"}, - {file = "jsii-1.75.0.tar.gz", hash = "sha256:87ecc63fdd7e972ae35f25e0804d86ce6f56871f1f4b0dc4e620d3e9fe761912"}, -] [package.dependencies] -attrs = ">=21.2,<23.0" +attrs = ">=21.2,<24.0" cattrs = ">=1.8,<22.3" +importlib-resources = ">=5.2.0" publication = ">=0.0.3" python-dateutil = "*" typeguard = ">=2.13.3,<2.14.0" @@ -224,10 +210,6 @@ description = "Publication helps you maintain public-api-friendly modules by pre category = "main" optional = false python-versions = "*" -files = [ - {file = "publication-0.0.3-py2.py3-none-any.whl", hash = "sha256:0248885351febc11d8a1098d5c8e3ab2dabcf3e8c0c96db1e17ecd12b53afbe6"}, - {file = "publication-0.0.3.tar.gz", hash = "sha256:68416a0de76dddcdd2930d1c8ef853a743cc96c82416c4e4d3b5d901c6276dc4"}, -] [[package]] name = "python-dateutil" @@ -236,10 +218,6 @@ description = "Extensions to the standard Python datetime module" category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] [package.dependencies] six = ">=1.5" @@ -251,10 +229,6 @@ description = "Python 2 and 3 compatibility utilities" category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] [[package]] name = "typeguard" @@ -263,10 +237,6 @@ description = "Run-time type checker for Python" category = "main" optional = false python-versions = ">=3.5.3" -files = [ - {file = "typeguard-2.13.3-py3-none-any.whl", hash = "sha256:5e3e3be01e887e7eafae5af63d1f36c849aaa94e3a0112097312aabfa16284f1"}, - {file = "typeguard-2.13.3.tar.gz", hash = "sha256:00edaa8da3a133674796cf5ea87d9f4b4c367d77476e185e80251cc13dfbb8c4"}, -] [package.extras] doc = ["sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] @@ -279,12 +249,102 @@ description = "Backported and Experimental Type Hints for Python 3.7+" category = "main" optional = false python-versions = ">=3.7" -files = [ + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[metadata] +lock-version = "1.1" +python-versions = ">=3.7.1, <4.0" +content-hash = "faec94c58751b5eab5e42e4828792e961730a13773db13621cd835a0f4af418b" + +[metadata.files] +attrs = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] +aws-cdk-asset-awscli-v1 = [ + {file = "aws-cdk.asset-awscli-v1-2.2.173.tar.gz", hash = "sha256:8fbda70421638a0cbe66386e1977c1deb9562881ab1e46b6145ad66c266e2a38"}, + {file = "aws_cdk.asset_awscli_v1-2.2.173-py3-none-any.whl", hash = "sha256:c6a0590a31284bc9fc586f00765d73526ab99817d3547e25597cd3d18bc2e9f3"}, +] +aws-cdk-asset-kubectl-v20 = [ + {file = "aws-cdk.asset-kubectl-v20-2.1.1.tar.gz", hash = "sha256:9834cdb150c5590aea4e5eba6de2a89b4c60617451181c524810c5a75154565c"}, + {file = "aws_cdk.asset_kubectl_v20-2.1.1-py3-none-any.whl", hash = "sha256:a2fad1a5a35a94a465efe60859f91e45dacc33261fb9bbf1cf9bbc6e2f70e9d6"}, +] +aws-cdk-asset-node-proxy-agent-v5 = [ + {file = "aws-cdk.asset-node-proxy-agent-v5-2.0.146.tar.gz", hash = "sha256:9aac36a3f69e9d7d99fc403192be41369ff0d0307a3e4c028a3a70e0108ac63c"}, + {file = "aws_cdk.asset_node_proxy_agent_v5-2.0.146-py3-none-any.whl", hash = "sha256:cec187b3e2db5f90839598511c002bb4fda1d1728b81ebcae58c13c901681583"}, +] +aws-cdk-aws-glue-alpha = [ + {file = "aws-cdk.aws-glue-alpha-2.79.1a0.tar.gz", hash = "sha256:95cd920a3dbae279699691391713e4d8a863088fc8b4cd082dd685b3f8714d11"}, + {file = "aws_cdk.aws_glue_alpha-2.79.1a0-py3-none-any.whl", hash = "sha256:2a0f0baf894d742292b29e040801ae5d0542c91c257ef76a14ae14f903339b0d"}, +] +aws-cdk-aws-neptune-alpha = [ + {file = "aws-cdk.aws-neptune-alpha-2.79.1a0.tar.gz", hash = "sha256:d2956b7659a5722f3a603c29883eb22a39e865845ae61eef3141906269e67a98"}, + {file = "aws_cdk.aws_neptune_alpha-2.79.1a0-py3-none-any.whl", hash = "sha256:1c558328d039df2ddbc661ad374fb84589a22500d2c5eea72bac57b90c58e4a2"}, +] +aws-cdk-aws-redshift-alpha = [ + {file = "aws-cdk.aws-redshift-alpha-2.79.1a0.tar.gz", hash = "sha256:a1c27c9cbdcaf9c2e43388367f60cb6ac00608d6432977ef987d24b380085165"}, + {file = "aws_cdk.aws_redshift_alpha-2.79.1a0-py3-none-any.whl", hash = "sha256:65fa9a04312a326c3bb08a86876b1b731ef4491fcc1f3679dee6a30a651f2ff8"}, +] +aws-cdk-lib = [ + {file = "aws-cdk-lib-2.79.1.tar.gz", hash = "sha256:7d88118827ed42025c7b661547b38c54b89afef2cd54214459df51104a1034c3"}, + {file = "aws_cdk_lib-2.79.1-py3-none-any.whl", hash = "sha256:89f51117cc9c2cb2561605fdd5641ceb705ebe0185276dab799193783a70123b"}, +] +cattrs = [ + {file = "cattrs-22.2.0-py3-none-any.whl", hash = "sha256:bc12b1f0d000b9f9bee83335887d532a1d3e99a833d1bf0882151c97d3e68c21"}, + {file = "cattrs-22.2.0.tar.gz", hash = "sha256:f0eed5642399423cf656e7b66ce92cdc5b963ecafd041d1b24d136fdde7acf6d"}, +] +constructs = [ + {file = "constructs-10.2.24-py3-none-any.whl", hash = "sha256:590d2fd1f616ee027a698e479584997e3b6c206dc2746c26b8e47cceb33296c6"}, + {file = "constructs-10.2.24.tar.gz", hash = "sha256:307abe5330dc81c1120d1876a4556d98b572abe85bb8c2b68bc7b6f18e359913"}, +] +exceptiongroup = [ + {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, + {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, +] +importlib-metadata = [ + {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"}, + {file = "importlib_metadata-6.6.0.tar.gz", hash = "sha256:92501cdf9cc66ebd3e612f1b4f0c0765dfa42f0fa38ffb319b6bd84dd675d705"}, +] +importlib-resources = [ + {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"}, + {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"}, +] +jsii = [ + {file = "jsii-1.81.0-py3-none-any.whl", hash = "sha256:6d12cd881053bafbac19d2a28fc616497479739784e017534e4cf128ff977b62"}, + {file = "jsii-1.81.0.tar.gz", hash = "sha256:585f6bedd9b586f48ce058451d24f362ee52936179987dd897a100f5355f228f"}, +] +publication = [ + {file = "publication-0.0.3-py2.py3-none-any.whl", hash = "sha256:0248885351febc11d8a1098d5c8e3ab2dabcf3e8c0c96db1e17ecd12b53afbe6"}, + {file = "publication-0.0.3.tar.gz", hash = "sha256:68416a0de76dddcdd2930d1c8ef853a743cc96c82416c4e4d3b5d901c6276dc4"}, +] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +typeguard = [ + {file = "typeguard-2.13.3-py3-none-any.whl", hash = "sha256:5e3e3be01e887e7eafae5af63d1f36c849aaa94e3a0112097312aabfa16284f1"}, + {file = "typeguard-2.13.3.tar.gz", hash = "sha256:00edaa8da3a133674796cf5ea87d9f4b4c367d77476e185e80251cc13dfbb8c4"}, +] +typing-extensions = [ {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, ] - -[metadata] -lock-version = "2.0" -python-versions = ">=3.7.1, <3.11" -content-hash = "cf03dc1ae6da1111d662ceaaa808517e2329a591443238c2d66174493ef36d77" +zipp = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] diff --git a/test_infra/pyproject.toml b/test_infra/pyproject.toml index a7529f13d..4f6a70c06 100644 --- a/test_infra/pyproject.toml +++ b/test_infra/pyproject.toml @@ -6,7 +6,7 @@ authors = ["Amazon Web Services"] license = "Apache License 2.0" [tool.poetry.dependencies] -python = ">=3.7.1, <3.11" +python = ">=3.7.1, <4.0" "aws-cdk-lib" = "^2.64.0" "constructs" = ">=10.0.0,<11.0.0" "aws-cdk.aws-glue-alpha" = "^2.64.0a0" From 7d8bed011a0edb4706510044c5bbf839303f8528 Mon Sep 17 00:00:00 2001 From: Anton Kukushkin Date: Mon, 15 May 2023 15:24:25 +0100 Subject: [PATCH 9/9] [skip ci] Workaround Modin S3 prefix issue --- tests/load/test_s3_modin.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/load/test_s3_modin.py b/tests/load/test_s3_modin.py index c8941bfb9..09baacc7d 100644 --- a/tests/load/test_s3_modin.py +++ b/tests/load/test_s3_modin.py @@ -57,7 +57,7 @@ def test_modin_s3_write_parquet_simple( df_s: pd.DataFrame, path: str, benchmark_time: float, request: pytest.FixtureRequest ) -> None: with ExecutionTimer(request, data_paths=path) as timer: - df_s.to_parquet(path) + df_s.to_parquet(path[:-1]) # path[:-1] due to Modin not properly handling S3 prefixes assert timer.elapsed_time < benchmark_time @@ -72,7 +72,9 @@ def test_modin_s3_write_parquet_dataset( request: pytest.FixtureRequest, ) -> None: with ExecutionTimer(request, data_paths=path) as timer: - df_s.to_parquet(path, partition_cols=partition_cols) + df_s.to_parquet( + path[:-1], partition_cols=partition_cols + ) # path[:-1] due to Modin not properly handling S3 prefixes assert timer.elapsed_time < benchmark_time