diff --git a/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py b/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py index 979b9aa51..a6ebd81cb 100644 --- a/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py +++ b/awswrangler/distributed/ray/datasources/pandas_file_based_datasource.py @@ -7,7 +7,7 @@ import pyarrow import ray from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder -from ray.data.block import Block, BlockAccessor +from ray.data.block import Block, BlockAccessor, BlockMetadata from ray.data.datasource.datasource import WriteResult from ray.data.datasource.file_based_datasource import ( BlockWritePathProvider, @@ -64,11 +64,24 @@ def __init__(self) -> None: def _read_file(self, f: pyarrow.NativeFile, path: str, **reader_args: Any) -> pd.DataFrame: raise NotImplementedError() - def do_write( + def do_write( # pylint: disable=arguments-differ self, blocks: List[ObjectRef[pd.DataFrame]], - *args: Any, - **kwargs: Any, + metadata: List[BlockMetadata], + path: str, + dataset_uuid: str, + filesystem: Optional[pyarrow.fs.FileSystem] = None, + try_create_dir: bool = True, + open_stream_args: Optional[Dict[str, Any]] = None, + block_path_provider: BlockWritePathProvider = DefaultBlockWritePathProvider(), + write_args_fn: Callable[[], Dict[str, Any]] = lambda: {}, + _block_udf: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None, + ray_remote_args: Optional[Dict[str, Any]] = None, + s3_additional_kwargs: Optional[Dict[str, str]] = None, + pandas_kwargs: Optional[Dict[str, Any]] = None, + compression: Optional[str] = None, + mode: str = "wb", + **write_args: Any, ) -> List[ObjectRef[WriteResult]]: """Create and return write tasks for a file-based datasource. @@ -77,21 +90,53 @@ def do_write( plan allowing query optimisation ("fuse" with other operations). The change is not backward-compatible with earlier versions still attempting to call do_write(). """ - write_tasks = [] - path: str = kwargs.pop("path") - dataset_uuid: str = kwargs.pop("dataset_uuid") - ray_remote_args: Dict[str, Any] = kwargs.pop("ray_remote_args") or {} + _write_block_to_file = self._write_block + + if ray_remote_args is None: + ray_remote_args = {} + + if pandas_kwargs is None: + pandas_kwargs = {} - _write = ray_remote(**ray_remote_args)(self.write) + if not compression: + compression = pandas_kwargs.get("compression") + + def write_block(write_path: str, block: pd.DataFrame) -> str: + if _block_udf is not None: + block = _block_udf(block) + + with open_s3_object( + path=write_path, + mode=mode, + use_threads=False, + s3_additional_kwargs=s3_additional_kwargs, + encoding=write_args.get("encoding"), + newline=write_args.get("newline"), + ) as f: + _write_block_to_file( + f, + BlockAccessor.for_block(block), + pandas_kwargs=pandas_kwargs, + compression=compression, + **write_args, + ) + return write_path + + write_block_fn = ray_remote(**ray_remote_args)(write_block) + + file_suffix = self._get_file_suffix(self._FILE_EXTENSION, compression) + write_tasks = [] for block_idx, block in enumerate(blocks): - write_task = _write( - [block], - TaskContext(task_idx=block_idx), + write_path = block_path_provider( path, - dataset_uuid, - **kwargs, + filesystem=filesystem, + dataset_uuid=dataset_uuid, + block=block, + block_index=block_idx, + file_format=file_suffix, ) + write_task = write_block_fn(write_path, block) write_tasks.append(write_task) return write_tasks diff --git a/test_infra/poetry.lock b/test_infra/poetry.lock index 73b57c3ba..76304d943 100644 --- a/test_infra/poetry.lock +++ b/test_infra/poetry.lock @@ -1,38 +1,31 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. - [[package]] name = "attrs" -version = "22.2.0" +version = "23.1.0" description = "Classes Without Boilerplate" category = "main" optional = false -python-versions = ">=3.6" -files = [ - {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, - {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, -] +python-versions = ">=3.7" + +[package.dependencies] +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} [package.extras] -cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] -dev = ["attrs[docs,tests]"] -docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"] -tests = ["attrs[tests-no-zope]", "zope.interface"] -tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] [[package]] name = "aws-cdk-asset-awscli-v1" -version = "2.2.69" +version = "2.2.173" description = "A library that contains the AWS CLI for use in Lambda Layers" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.asset-awscli-v1-2.2.69.tar.gz", hash = "sha256:a076a29075ce863d2a3cae2036f31d2317e5dc52f97780d828bf67d37a3fb523"}, - {file = "aws_cdk.asset_awscli_v1-2.2.69-py3-none-any.whl", hash = "sha256:e53d352bc50c566bbbbcbb85b17f98432af853eff16559570c6d5d6be58bb44e"}, -] [package.dependencies] -jsii = ">=1.75.0,<2.0.0" +jsii = ">=1.81.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" @@ -43,10 +36,6 @@ description = "A library that contains kubectl for use in Lambda Layers" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.asset-kubectl-v20-2.1.1.tar.gz", hash = "sha256:9834cdb150c5590aea4e5eba6de2a89b4c60617451181c524810c5a75154565c"}, - {file = "aws_cdk.asset_kubectl_v20-2.1.1-py3-none-any.whl", hash = "sha256:a2fad1a5a35a94a465efe60859f91e45dacc33261fb9bbf1cf9bbc6e2f70e9d6"}, -] [package.dependencies] jsii = ">=1.70.0,<2.0.0" @@ -55,96 +44,76 @@ typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-asset-node-proxy-agent-v5" -version = "2.0.58" +version = "2.0.146" description = "@aws-cdk/asset-node-proxy-agent-v5" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.asset-node-proxy-agent-v5-2.0.58.tar.gz", hash = "sha256:2b045e6d1ec8f3290ab32b839ed26fddd691dfa2cb8868fc2d641a3c9b768cad"}, - {file = "aws_cdk.asset_node_proxy_agent_v5-2.0.58-py3-none-any.whl", hash = "sha256:e9b913a613342f277505eb32ac268d699ebf66624940509b2e33453392f6f90a"}, -] [package.dependencies] -jsii = ">=1.75.0,<2.0.0" +jsii = ">=1.81.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-aws-glue-alpha" -version = "2.65.0a0" +version = "2.79.1a0" description = "The CDK Construct Library for AWS::Glue" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.aws-glue-alpha-2.65.0a0.tar.gz", hash = "sha256:c36f4bc149115bd2798b52df4fe1106f5d2b829f121ece3a913d48ed8953b673"}, - {file = "aws_cdk.aws_glue_alpha-2.65.0a0-py3-none-any.whl", hash = "sha256:5df1bd87e0c4fd3d0735b3f2d6f5cfbba5b6e9d2e3821ba661f0682d519647af"}, -] [package.dependencies] -aws-cdk-lib = ">=2.65.0,<3.0.0" +aws-cdk-lib = "2.79.1" constructs = ">=10.0.0,<11.0.0" -jsii = ">=1.74.0,<2.0.0" +jsii = ">=1.80.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-aws-neptune-alpha" -version = "2.65.0a0" +version = "2.79.1a0" description = "The CDK Construct Library for AWS::Neptune" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.aws-neptune-alpha-2.65.0a0.tar.gz", hash = "sha256:1ddd153c0304392e3aa816394de9a517e33b0e1460e10e7f031459ce4c864b85"}, - {file = "aws_cdk.aws_neptune_alpha-2.65.0a0-py3-none-any.whl", hash = "sha256:6f383d13821d01315c9f8728ac016c863d2cba837edea9216cd350267c3954b8"}, -] [package.dependencies] -aws-cdk-lib = ">=2.65.0,<3.0.0" +aws-cdk-lib = "2.79.1" constructs = ">=10.0.0,<11.0.0" -jsii = ">=1.74.0,<2.0.0" +jsii = ">=1.80.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-aws-redshift-alpha" -version = "2.65.0a0" +version = "2.79.1a0" description = "The CDK Construct Library for AWS::Redshift" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk.aws-redshift-alpha-2.65.0a0.tar.gz", hash = "sha256:6e871b76f210cea9a358bb1b87e6134284be655320e8c475fc70de1aa1855f74"}, - {file = "aws_cdk.aws_redshift_alpha-2.65.0a0-py3-none-any.whl", hash = "sha256:4416278ab70af79f19093db2b94e9ea3a46fb107ebd87f5c282c8de09ca84917"}, -] [package.dependencies] -aws-cdk-lib = ">=2.65.0,<3.0.0" +aws-cdk-lib = "2.79.1" constructs = ">=10.0.0,<11.0.0" -jsii = ">=1.74.0,<2.0.0" +jsii = ">=1.80.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "aws-cdk-lib" -version = "2.65.0" +version = "2.79.1" description = "Version 2 of the AWS Cloud Development Kit library" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "aws-cdk-lib-2.65.0.tar.gz", hash = "sha256:7d2ea69f827b7f325567109c482ead017f5d2e0fc071d4b352f5db87c24ba010"}, - {file = "aws_cdk_lib-2.65.0-py3-none-any.whl", hash = "sha256:ea12088a72b858a9bf1aaaf0f6de5e6dfe5962b382324239eb7c830327b84f7c"}, -] [package.dependencies] -"aws-cdk.asset-awscli-v1" = ">=2.2.65,<3.0.0" +"aws-cdk.asset-awscli-v1" = ">=2.2.165,<3.0.0" "aws-cdk.asset-kubectl-v20" = ">=2.1.1,<3.0.0" -"aws-cdk.asset-node-proxy-agent-v5" = ">=2.0.54,<3.0.0" +"aws-cdk.asset-node-proxy-agent-v5" = ">=2.0.139,<3.0.0" constructs = ">=10.0.0,<11.0.0" -jsii = ">=1.74.0,<2.0.0" +jsii = ">=1.80.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" @@ -155,10 +124,6 @@ description = "Composable complex class support for attrs and dataclasses." category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "cattrs-22.2.0-py3-none-any.whl", hash = "sha256:bc12b1f0d000b9f9bee83335887d532a1d3e99a833d1bf0882151c97d3e68c21"}, - {file = "cattrs-22.2.0.tar.gz", hash = "sha256:f0eed5642399423cf656e7b66ce92cdc5b963ecafd041d1b24d136fdde7acf6d"}, -] [package.dependencies] attrs = ">=20" @@ -167,51 +132,72 @@ typing_extensions = {version = "*", markers = "python_version < \"3.8\""} [[package]] name = "constructs" -version = "10.1.252" +version = "10.2.24" description = "A programming model for software-defined state" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "constructs-10.1.252-py3-none-any.whl", hash = "sha256:cf115fe7729c93ce01b1839e8760be24606c694b96b628230711cbf9b8fa1acd"}, - {file = "constructs-10.1.252.tar.gz", hash = "sha256:1ee8f4a11c4515730dff1de9c40342dd37bd7bd5f5650a500c06a22954e373a2"}, -] [package.dependencies] -jsii = ">=1.75.0,<2.0.0" +jsii = ">=1.81.0,<2.0.0" publication = ">=0.0.3" typeguard = ">=2.13.3,<2.14.0" [[package]] name = "exceptiongroup" -version = "1.1.0" +version = "1.1.1" description = "Backport of PEP 654 (exception groups)" category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "exceptiongroup-1.1.0-py3-none-any.whl", hash = "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e"}, - {file = "exceptiongroup-1.1.0.tar.gz", hash = "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"}, -] [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "importlib-metadata" +version = "6.6.0" +description = "Read metadata from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] + +[[package]] +name = "importlib-resources" +version = "5.12.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + [[package]] name = "jsii" -version = "1.75.0" +version = "1.81.0" description = "Python client for jsii runtime" category = "main" optional = false python-versions = "~=3.7" -files = [ - {file = "jsii-1.75.0-py3-none-any.whl", hash = "sha256:0a36266470e223413f5e3b10ab656bb0a9c8a8902aa180a0c1ebcc93cc15cfce"}, - {file = "jsii-1.75.0.tar.gz", hash = "sha256:87ecc63fdd7e972ae35f25e0804d86ce6f56871f1f4b0dc4e620d3e9fe761912"}, -] [package.dependencies] -attrs = ">=21.2,<23.0" +attrs = ">=21.2,<24.0" cattrs = ">=1.8,<22.3" +importlib-resources = ">=5.2.0" publication = ">=0.0.3" python-dateutil = "*" typeguard = ">=2.13.3,<2.14.0" @@ -224,10 +210,6 @@ description = "Publication helps you maintain public-api-friendly modules by pre category = "main" optional = false python-versions = "*" -files = [ - {file = "publication-0.0.3-py2.py3-none-any.whl", hash = "sha256:0248885351febc11d8a1098d5c8e3ab2dabcf3e8c0c96db1e17ecd12b53afbe6"}, - {file = "publication-0.0.3.tar.gz", hash = "sha256:68416a0de76dddcdd2930d1c8ef853a743cc96c82416c4e4d3b5d901c6276dc4"}, -] [[package]] name = "python-dateutil" @@ -236,10 +218,6 @@ description = "Extensions to the standard Python datetime module" category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] [package.dependencies] six = ">=1.5" @@ -251,10 +229,6 @@ description = "Python 2 and 3 compatibility utilities" category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] [[package]] name = "typeguard" @@ -263,10 +237,6 @@ description = "Run-time type checker for Python" category = "main" optional = false python-versions = ">=3.5.3" -files = [ - {file = "typeguard-2.13.3-py3-none-any.whl", hash = "sha256:5e3e3be01e887e7eafae5af63d1f36c849aaa94e3a0112097312aabfa16284f1"}, - {file = "typeguard-2.13.3.tar.gz", hash = "sha256:00edaa8da3a133674796cf5ea87d9f4b4c367d77476e185e80251cc13dfbb8c4"}, -] [package.extras] doc = ["sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] @@ -279,12 +249,102 @@ description = "Backported and Experimental Type Hints for Python 3.7+" category = "main" optional = false python-versions = ">=3.7" -files = [ + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[metadata] +lock-version = "1.1" +python-versions = ">=3.7.1, <4.0" +content-hash = "faec94c58751b5eab5e42e4828792e961730a13773db13621cd835a0f4af418b" + +[metadata.files] +attrs = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] +aws-cdk-asset-awscli-v1 = [ + {file = "aws-cdk.asset-awscli-v1-2.2.173.tar.gz", hash = "sha256:8fbda70421638a0cbe66386e1977c1deb9562881ab1e46b6145ad66c266e2a38"}, + {file = "aws_cdk.asset_awscli_v1-2.2.173-py3-none-any.whl", hash = "sha256:c6a0590a31284bc9fc586f00765d73526ab99817d3547e25597cd3d18bc2e9f3"}, +] +aws-cdk-asset-kubectl-v20 = [ + {file = "aws-cdk.asset-kubectl-v20-2.1.1.tar.gz", hash = "sha256:9834cdb150c5590aea4e5eba6de2a89b4c60617451181c524810c5a75154565c"}, + {file = "aws_cdk.asset_kubectl_v20-2.1.1-py3-none-any.whl", hash = "sha256:a2fad1a5a35a94a465efe60859f91e45dacc33261fb9bbf1cf9bbc6e2f70e9d6"}, +] +aws-cdk-asset-node-proxy-agent-v5 = [ + {file = "aws-cdk.asset-node-proxy-agent-v5-2.0.146.tar.gz", hash = "sha256:9aac36a3f69e9d7d99fc403192be41369ff0d0307a3e4c028a3a70e0108ac63c"}, + {file = "aws_cdk.asset_node_proxy_agent_v5-2.0.146-py3-none-any.whl", hash = "sha256:cec187b3e2db5f90839598511c002bb4fda1d1728b81ebcae58c13c901681583"}, +] +aws-cdk-aws-glue-alpha = [ + {file = "aws-cdk.aws-glue-alpha-2.79.1a0.tar.gz", hash = "sha256:95cd920a3dbae279699691391713e4d8a863088fc8b4cd082dd685b3f8714d11"}, + {file = "aws_cdk.aws_glue_alpha-2.79.1a0-py3-none-any.whl", hash = "sha256:2a0f0baf894d742292b29e040801ae5d0542c91c257ef76a14ae14f903339b0d"}, +] +aws-cdk-aws-neptune-alpha = [ + {file = "aws-cdk.aws-neptune-alpha-2.79.1a0.tar.gz", hash = "sha256:d2956b7659a5722f3a603c29883eb22a39e865845ae61eef3141906269e67a98"}, + {file = "aws_cdk.aws_neptune_alpha-2.79.1a0-py3-none-any.whl", hash = "sha256:1c558328d039df2ddbc661ad374fb84589a22500d2c5eea72bac57b90c58e4a2"}, +] +aws-cdk-aws-redshift-alpha = [ + {file = "aws-cdk.aws-redshift-alpha-2.79.1a0.tar.gz", hash = "sha256:a1c27c9cbdcaf9c2e43388367f60cb6ac00608d6432977ef987d24b380085165"}, + {file = "aws_cdk.aws_redshift_alpha-2.79.1a0-py3-none-any.whl", hash = "sha256:65fa9a04312a326c3bb08a86876b1b731ef4491fcc1f3679dee6a30a651f2ff8"}, +] +aws-cdk-lib = [ + {file = "aws-cdk-lib-2.79.1.tar.gz", hash = "sha256:7d88118827ed42025c7b661547b38c54b89afef2cd54214459df51104a1034c3"}, + {file = "aws_cdk_lib-2.79.1-py3-none-any.whl", hash = "sha256:89f51117cc9c2cb2561605fdd5641ceb705ebe0185276dab799193783a70123b"}, +] +cattrs = [ + {file = "cattrs-22.2.0-py3-none-any.whl", hash = "sha256:bc12b1f0d000b9f9bee83335887d532a1d3e99a833d1bf0882151c97d3e68c21"}, + {file = "cattrs-22.2.0.tar.gz", hash = "sha256:f0eed5642399423cf656e7b66ce92cdc5b963ecafd041d1b24d136fdde7acf6d"}, +] +constructs = [ + {file = "constructs-10.2.24-py3-none-any.whl", hash = "sha256:590d2fd1f616ee027a698e479584997e3b6c206dc2746c26b8e47cceb33296c6"}, + {file = "constructs-10.2.24.tar.gz", hash = "sha256:307abe5330dc81c1120d1876a4556d98b572abe85bb8c2b68bc7b6f18e359913"}, +] +exceptiongroup = [ + {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, + {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, +] +importlib-metadata = [ + {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"}, + {file = "importlib_metadata-6.6.0.tar.gz", hash = "sha256:92501cdf9cc66ebd3e612f1b4f0c0765dfa42f0fa38ffb319b6bd84dd675d705"}, +] +importlib-resources = [ + {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"}, + {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"}, +] +jsii = [ + {file = "jsii-1.81.0-py3-none-any.whl", hash = "sha256:6d12cd881053bafbac19d2a28fc616497479739784e017534e4cf128ff977b62"}, + {file = "jsii-1.81.0.tar.gz", hash = "sha256:585f6bedd9b586f48ce058451d24f362ee52936179987dd897a100f5355f228f"}, +] +publication = [ + {file = "publication-0.0.3-py2.py3-none-any.whl", hash = "sha256:0248885351febc11d8a1098d5c8e3ab2dabcf3e8c0c96db1e17ecd12b53afbe6"}, + {file = "publication-0.0.3.tar.gz", hash = "sha256:68416a0de76dddcdd2930d1c8ef853a743cc96c82416c4e4d3b5d901c6276dc4"}, +] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +typeguard = [ + {file = "typeguard-2.13.3-py3-none-any.whl", hash = "sha256:5e3e3be01e887e7eafae5af63d1f36c849aaa94e3a0112097312aabfa16284f1"}, + {file = "typeguard-2.13.3.tar.gz", hash = "sha256:00edaa8da3a133674796cf5ea87d9f4b4c367d77476e185e80251cc13dfbb8c4"}, +] +typing-extensions = [ {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, ] - -[metadata] -lock-version = "2.0" -python-versions = ">=3.7.1, <3.11" -content-hash = "cf03dc1ae6da1111d662ceaaa808517e2329a591443238c2d66174493ef36d77" +zipp = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] diff --git a/test_infra/pyproject.toml b/test_infra/pyproject.toml index a7529f13d..4f6a70c06 100644 --- a/test_infra/pyproject.toml +++ b/test_infra/pyproject.toml @@ -6,7 +6,7 @@ authors = ["Amazon Web Services"] license = "Apache License 2.0" [tool.poetry.dependencies] -python = ">=3.7.1, <3.11" +python = ">=3.7.1, <4.0" "aws-cdk-lib" = "^2.64.0" "constructs" = ">=10.0.0,<11.0.0" "aws-cdk.aws-glue-alpha" = "^2.64.0a0" diff --git a/tests/load/test_s3_modin.py b/tests/load/test_s3_modin.py index c8941bfb9..09baacc7d 100644 --- a/tests/load/test_s3_modin.py +++ b/tests/load/test_s3_modin.py @@ -57,7 +57,7 @@ def test_modin_s3_write_parquet_simple( df_s: pd.DataFrame, path: str, benchmark_time: float, request: pytest.FixtureRequest ) -> None: with ExecutionTimer(request, data_paths=path) as timer: - df_s.to_parquet(path) + df_s.to_parquet(path[:-1]) # path[:-1] due to Modin not properly handling S3 prefixes assert timer.elapsed_time < benchmark_time @@ -72,7 +72,9 @@ def test_modin_s3_write_parquet_dataset( request: pytest.FixtureRequest, ) -> None: with ExecutionTimer(request, data_paths=path) as timer: - df_s.to_parquet(path, partition_cols=partition_cols) + df_s.to_parquet( + path[:-1], partition_cols=partition_cols + ) # path[:-1] due to Modin not properly handling S3 prefixes assert timer.elapsed_time < benchmark_time