From bba0e77c83929a6f4c6ffdbbc0b48eac69854b5f Mon Sep 17 00:00:00 2001 From: Brannon Imamura Date: Tue, 5 Apr 2022 14:19:44 -0500 Subject: [PATCH 1/4] pass sql_copy_extra_params to _copy for redshift.copy() --- awswrangler/redshift.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/awswrangler/redshift.py b/awswrangler/redshift.py index c1e70251c..f6f2af0d8 100644 --- a/awswrangler/redshift.py +++ b/awswrangler/redshift.py @@ -1411,6 +1411,7 @@ def copy( # pylint: disable=too-many-arguments boto3_session: Optional[boto3.Session] = None, s3_additional_kwargs: Optional[Dict[str, str]] = None, max_rows_by_file: Optional[int] = 10_000_000, + sql_copy_extra_params: Optional[List[str]] = None, ) -> None: """Load Pandas DataFrame as a Table on Amazon Redshift using parquet files on S3 as stage. @@ -1573,6 +1574,7 @@ def copy( # pylint: disable=too-many-arguments lock=lock, boto3_session=session, s3_additional_kwargs=s3_additional_kwargs, + sql_copy_extra_params=sql_copy_extra_params, ) finally: if keep_files is False: From 545aaba822d24741a3887a00f928b950a2f0891c Mon Sep 17 00:00:00 2001 From: Brannon Imamura Date: Tue, 5 Apr 2022 14:27:56 -0500 Subject: [PATCH 2/4] bump vers --- .bumpversion.cfg | 2 +- CONTRIBUTING_COMMON_ERRORS.md | 6 +- README.md | 72 +++++++++---------- awswrangler/__metadata__.py | 2 +- awswrangler/athena/_read.py | 16 ++--- awswrangler/s3/_read_parquet.py | 2 +- awswrangler/s3/_read_text.py | 6 +- awswrangler/s3/_write_parquet.py | 6 +- awswrangler/s3/_write_text.py | 12 ++-- docs/source/install.rst | 10 +-- docs/source/what.rst | 2 +- pyproject.toml | 2 +- test_infra/pyproject.toml | 2 +- tests/test_metadata.py | 2 +- tutorials/001 - Introduction.ipynb | 20 +++--- ...shift, MySQL, PostgreSQL, SQL Server.ipynb | 24 +++---- tutorials/014 - Schema Evolution.ipynb | 6 +- tutorials/021 - Global Configurations.ipynb | 2 +- ...22 - Writing Partitions Concurrently.ipynb | 2 +- .../023 - Flexible Partitions Filter.ipynb | 2 +- 20 files changed, 99 insertions(+), 99 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 380e4da76..1c951af81 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.15.0 +current_version = 2.15.1 commit = False tag = False tag_name = {new_version} diff --git a/CONTRIBUTING_COMMON_ERRORS.md b/CONTRIBUTING_COMMON_ERRORS.md index 2f69565de..0626df86c 100644 --- a/CONTRIBUTING_COMMON_ERRORS.md +++ b/CONTRIBUTING_COMMON_ERRORS.md @@ -13,9 +13,9 @@ Requirement already satisfied: pbr!=2.1.0,>=2.0.0 in ./.venv/lib/python3.7/site- Using legacy 'setup.py install' for python-Levenshtein, since package 'wheel' is not installed. Installing collected packages: awswrangler, python-Levenshtein Attempting uninstall: awswrangler - Found existing installation: awswrangler 2.15.0 - Uninstalling awswrangler-2.15.0: - Successfully uninstalled awswrangler-2.15.0 + Found existing installation: awswrangler 2.15.1 + Uninstalling awswrangler-2.15.1: + Successfully uninstalled awswrangler-2.15.1 Running setup.py develop for awswrangler Running setup.py install for python-Levenshtein ... error ERROR: Command errored out with exit status 1: diff --git a/README.md b/README.md index 0020accad..446dcac05 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Easy integration with Athena, Glue, Redshift, Timestream, OpenSearch, Neptune, Q > An [AWS Professional Service](https://aws.amazon.com/professional-services/) open source initiative | aws-proserve-opensource@amazon.com -[![Release](https://img.shields.io/badge/release-2.15.0-brightgreen.svg)](https://pypi.org/project/awswrangler/) +[![Release](https://img.shields.io/badge/release-2.15.1-brightgreen.svg)](https://pypi.org/project/awswrangler/) [![Python Version](https://img.shields.io/badge/python-3.7%20%7C%203.7%20%7C%203.8%20%7C%203.10-brightgreen.svg)](https://anaconda.org/conda-forge/awswrangler) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) @@ -23,7 +23,7 @@ Easy integration with Athena, Glue, Redshift, Timestream, OpenSearch, Neptune, Q | **[PyPi](https://pypi.org/project/awswrangler/)** | [![PyPI Downloads](https://pepy.tech/badge/awswrangler)](https://pypi.org/project/awswrangler/) | `pip install awswrangler` | | **[Conda](https://anaconda.org/conda-forge/awswrangler)** | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/awswrangler.svg)](https://anaconda.org/conda-forge/awswrangler) | `conda install -c conda-forge awswrangler` | -> ⚠️ **For platforms without PyArrow 3 support (e.g. [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#emr-cluster), [Glue PySpark Job](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-pyspark-jobs), MWAA):**
+> ⚠️ **For platforms without PyArrow 3 support (e.g. [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#emr-cluster), [Glue PySpark Job](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-pyspark-jobs), MWAA):**
➡️ `pip install pyarrow==2 awswrangler` Powered By [](https://arrow.apache.org/powered_by/) @@ -42,7 +42,7 @@ Powered By [](http Installation command: `pip install awswrangler` -> ⚠️ **For platforms without PyArrow 3 support (e.g. [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#emr-cluster), [Glue PySpark Job](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-pyspark-jobs), MWAA):**
+> ⚠️ **For platforms without PyArrow 3 support (e.g. [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#emr-cluster), [Glue PySpark Job](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-pyspark-jobs), MWAA):**
➡️`pip install pyarrow==2 awswrangler` ```py3 @@ -96,17 +96,17 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3 ## [Read The Docs](https://aws-data-wrangler.readthedocs.io/) -- [**What is AWS Data Wrangler?**](https://aws-data-wrangler.readthedocs.io/en/2.15.0/what.html) -- [**Install**](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html) - - [PyPi (pip)](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#pypi-pip) - - [Conda](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#conda) - - [AWS Lambda Layer](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-lambda-layer) - - [AWS Glue Python Shell Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-python-shell-jobs) - - [AWS Glue PySpark Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-pyspark-jobs) - - [Amazon SageMaker Notebook](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#amazon-sagemaker-notebook) - - [Amazon SageMaker Notebook Lifecycle](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#amazon-sagemaker-notebook-lifecycle) - - [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#emr) - - [From source](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#from-source) +- [**What is AWS Data Wrangler?**](https://aws-data-wrangler.readthedocs.io/en/2.15.1/what.html) +- [**Install**](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html) + - [PyPi (pip)](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#pypi-pip) + - [Conda](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#conda) + - [AWS Lambda Layer](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-lambda-layer) + - [AWS Glue Python Shell Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-python-shell-jobs) + - [AWS Glue PySpark Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-pyspark-jobs) + - [Amazon SageMaker Notebook](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#amazon-sagemaker-notebook) + - [Amazon SageMaker Notebook Lifecycle](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#amazon-sagemaker-notebook-lifecycle) + - [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#emr) + - [From source](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#from-source) - [**Tutorials**](https://github.com/awslabs/aws-data-wrangler/tree/main/tutorials) - [001 - Introduction](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/001%20-%20Introduction.ipynb) - [002 - Sessions](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/002%20-%20Sessions.ipynb) @@ -141,28 +141,28 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3 - [031 - OpenSearch](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/031%20-%20OpenSearch.ipynb) - [032 - Lake Formation Governed Tables](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/032%20-%20Lake%20Formation%20Governed%20Tables.ipynb) - [033 - Amazon Neptune](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/033%20-%20Amazon%20Neptune.ipynb) -- [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html) - - [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-s3) - - [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#aws-glue-catalog) - - [Amazon Athena](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-athena) - - [AWS Lake Formation](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#aws-lake-formation) - - [Amazon Redshift](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-redshift) - - [PostgreSQL](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#postgresql) - - [MySQL](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#mysql) - - [SQL Server](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#sqlserver) - - [Data API Redshift](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#data-api-redshift) - - [Data API RDS](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#data-api-rds) - - [OpenSearch](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#opensearch) - - [Amazon Neptune](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-neptune) - - [DynamoDB](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#dynamodb) - - [Amazon Timestream](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-timestream) - - [Amazon EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-emr) - - [Amazon CloudWatch Logs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-cloudwatch-logs) - - [Amazon Chime](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-chime) - - [Amazon QuickSight](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-quicksight) - - [AWS STS](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#aws-sts) - - [AWS Secrets Manager](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#aws-secrets-manager) - - [Global Configurations](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#global-configurations) +- [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html) + - [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-s3) + - [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#aws-glue-catalog) + - [Amazon Athena](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-athena) + - [AWS Lake Formation](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#aws-lake-formation) + - [Amazon Redshift](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-redshift) + - [PostgreSQL](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#postgresql) + - [MySQL](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#mysql) + - [SQL Server](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#sqlserver) + - [Data API Redshift](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#data-api-redshift) + - [Data API RDS](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#data-api-rds) + - [OpenSearch](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#opensearch) + - [Amazon Neptune](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-neptune) + - [DynamoDB](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#dynamodb) + - [Amazon Timestream](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-timestream) + - [Amazon EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-emr) + - [Amazon CloudWatch Logs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-cloudwatch-logs) + - [Amazon Chime](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-chime) + - [Amazon QuickSight](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-quicksight) + - [AWS STS](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#aws-sts) + - [AWS Secrets Manager](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#aws-secrets-manager) + - [Global Configurations](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#global-configurations) - [**License**](https://github.com/awslabs/aws-data-wrangler/blob/main/LICENSE.txt) - [**Contributing**](https://github.com/awslabs/aws-data-wrangler/blob/main/CONTRIBUTING.md) - [**Legacy Docs** (pre-1.0.0)](https://aws-data-wrangler.readthedocs.io/en/0.3.3/) diff --git a/awswrangler/__metadata__.py b/awswrangler/__metadata__.py index 2302622ee..af3ed9bf6 100644 --- a/awswrangler/__metadata__.py +++ b/awswrangler/__metadata__.py @@ -7,5 +7,5 @@ __title__: str = "awswrangler" __description__: str = "Pandas on AWS." -__version__: str = "2.15.0" +__version__: str = "2.15.1" __license__: str = "Apache License 2.0" diff --git a/awswrangler/athena/_read.py b/awswrangler/athena/_read.py index 71c97ed95..6bf896554 100644 --- a/awswrangler/athena/_read.py +++ b/awswrangler/athena/_read.py @@ -591,11 +591,11 @@ def read_sql_query( **Related tutorial:** - - `Amazon Athena `_ - - `Athena Cache `_ - - `Global Configurations `_ **There are three approaches available through ctas_approach and unload_approach parameters:** @@ -660,7 +660,7 @@ def read_sql_query( /athena.html#Athena.Client.get_query_execution>`_ . For a practical example check out the - `related tutorial `_! @@ -904,11 +904,11 @@ def read_sql_table( **Related tutorial:** - - `Amazon Athena `_ - - `Athena Cache `_ - - `Global Configurations `_ **There are two approaches to be defined through ctas_approach parameter:** @@ -953,7 +953,7 @@ def read_sql_table( /athena.html#Athena.Client.get_query_execution>`_ . For a practical example check out the - `related tutorial `_! diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py index afb340613..84dd873c0 100644 --- a/awswrangler/s3/_read_parquet.py +++ b/awswrangler/s3/_read_parquet.py @@ -849,7 +849,7 @@ def read_parquet_table( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html columns : List[str], optional Names of columns to read from the file(s). validate_schema: diff --git a/awswrangler/s3/_read_text.py b/awswrangler/s3/_read_text.py index 8caef1664..e36a7f3ae 100644 --- a/awswrangler/s3/_read_text.py +++ b/awswrangler/s3/_read_text.py @@ -241,7 +241,7 @@ def read_csv( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html pandas_kwargs : KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. @@ -389,7 +389,7 @@ def read_fwf( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html pandas_kwargs: KEYWORD arguments forwarded to pandas.read_fwf(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. @@ -541,7 +541,7 @@ def read_json( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html pandas_kwargs: KEYWORD arguments forwarded to pandas.read_json(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. diff --git a/awswrangler/s3/_write_parquet.py b/awswrangler/s3/_write_parquet.py index 897d03818..a3c4ceff4 100644 --- a/awswrangler/s3/_write_parquet.py +++ b/awswrangler/s3/_write_parquet.py @@ -297,18 +297,18 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals,too-many-b concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode: str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/004%20-%20Parquet%20Datasets.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/004%20-%20Parquet%20Datasets.html catalog_versioning : bool If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it. schema_evolution : bool If True allows schema evolution (new or missing columns), otherwise a exception will be raised. True by default. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/014%20-%20Schema%20Evolution.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional diff --git a/awswrangler/s3/_write_text.py b/awswrangler/s3/_write_text.py index 2684fda3f..f84347ec7 100644 --- a/awswrangler/s3/_write_text.py +++ b/awswrangler/s3/_write_text.py @@ -173,18 +173,18 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode : str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet + https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet catalog_versioning : bool If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it. schema_evolution : bool If True allows schema evolution (new or missing columns), otherwise a exception will be raised. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")). False by default. Related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/014%20-%20Schema%20Evolution.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional @@ -746,18 +746,18 @@ def to_json( # pylint: disable=too-many-arguments,too-many-locals,too-many-stat concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode : str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet + https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet catalog_versioning : bool If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it. schema_evolution : bool If True allows schema evolution (new or missing columns), otherwise a exception will be raised. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/014%20-%20Schema%20Evolution.html + https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional diff --git a/docs/source/install.rst b/docs/source/install.rst index 9828044dd..dbb609338 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -100,7 +100,7 @@ Here is an example of how to create and use the AWS Data Wrangler Lambda layer i "wrangler-layer", location=sam.CfnApplication.ApplicationLocationProperty( application_id="arn:aws:serverlessrepo:us-east-1:336392948345:applications/aws-data-wrangler-layer-py3-8", - semantic_version="2.15.0", # Get the latest version from https://github.com/awslabs/aws-data-wrangler/releases + semantic_version="2.15.1", # Get the latest version from https://github.com/awslabs/aws-data-wrangler/releases ), ) @@ -120,7 +120,7 @@ Here is an example of how to create and use the AWS Data Wrangler Lambda layer i AWS Glue Python Shell Jobs -------------------------- -.. note:: Glue Python Shell runs on Python3.6, for which support was dropped in version 2.15.0 of Wrangler. Please use version 2.14.0 of the library or below. +.. note:: Glue Python Shell runs on Python3.6, for which support was dropped in version 2.15.1 of Wrangler. Please use version 2.14.0 of the library or below. 1 - Go to `GitHub's release page `_ and download the wheel file (.whl) related to the desired version. Alternatively, you can download the wheel from the `public artifacts bucket `_. @@ -144,7 +144,7 @@ Go to your Glue PySpark job and create a new *Job parameters* key/value: To install a specific version, set the value for the above Job parameter as follows: -* Value: ``cython==0.29.21,pg8000==1.21.0,pyarrow==2,pandas==1.3.0,awswrangler==2.15.0`` +* Value: ``cython==0.29.21,pg8000==1.21.0,pyarrow==2,pandas==1.3.0,awswrangler==2.15.1`` .. note:: Pyarrow 3 is not currently supported in Glue PySpark Jobs, which is why an installation of pyarrow 2 is required. @@ -163,7 +163,7 @@ Lambda zipped layers and Python wheels are stored in a publicly accessible S3 bu * Python wheel: ``awswrangler--py3-none-any.whl`` -For example: ``s3://aws-data-wrangler-public-artifacts/releases/2.15.0/awswrangler-layer-2.15.0-py3.8.zip`` +For example: ``s3://aws-data-wrangler-public-artifacts/releases/2.15.1/awswrangler-layer-2.15.1-py3.8.zip`` Amazon SageMaker Notebook ------------------------- @@ -253,7 +253,7 @@ Despite not being a distributed library, AWS Data Wrangler could be used to comp sudo pip install pyarrow==2 awswrangler .. note:: Make sure to freeze the library version in the bootstrap for production - environments (e.g. awswrangler==2.15.0) + environments (e.g. awswrangler==2.15.1) .. note:: Pyarrow 3 is not currently supported in the default EMR image, which is why an installation of pyarrow 2 is required. diff --git a/docs/source/what.rst b/docs/source/what.rst index 7de0b2b24..ace477701 100644 --- a/docs/source/what.rst +++ b/docs/source/what.rst @@ -8,4 +8,4 @@ DynamoDB, EMR, SecretManager, PostgreSQL, MySQL, SQLServer and S3 (Parquet, CSV, Built on top of other open-source projects like `Pandas `_, `Apache Arrow `_ and `Boto3 `_, it offers abstracted functions to execute usual ETL tasks like load/unload data from **Data Lakes**, **Data Warehouses** and **Databases**. -Check our `tutorials `_ or the `list of functionalities `_. \ No newline at end of file +Check our `tutorials `_ or the `list of functionalities `_. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3ad8794b2..3dfc24edf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "awswrangler" -version = "2.15.0" +version = "2.15.1" description = "Pandas on AWS." authors = ["Igor Tavares"] license = "Apache License 2.0" diff --git a/test_infra/pyproject.toml b/test_infra/pyproject.toml index 292e98fed..c3384a88a 100644 --- a/test_infra/pyproject.toml +++ b/test_infra/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "awswrangler - test infrastructure" -version = "2.15.0" +version = "2.15.1" description = "CDK test infrastructure for AWS" authors = ["Amazon Web Services"] license = "Apache License 2.0" diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 76a8a1b84..c610c8cb8 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -2,7 +2,7 @@ def test_metadata(): - assert wr.__version__ == "2.15.0" + assert wr.__version__ == "2.15.1" assert wr.__title__ == "awswrangler" assert wr.__description__ == "Pandas on AWS." assert wr.__license__ == "Apache License 2.0" diff --git a/tutorials/001 - Introduction.ipynb b/tutorials/001 - Introduction.ipynb index 745216d9a..cc3b81a42 100644 --- a/tutorials/001 - Introduction.ipynb +++ b/tutorials/001 - Introduction.ipynb @@ -19,7 +19,7 @@ "\n", "Built on top of other open-source projects like [Pandas](https://github.com/pandas-dev/pandas), [Apache Arrow](https://github.com/apache/arrow) and [Boto3](https://github.com/boto/boto3), it offers abstracted functions to execute usual ETL tasks like load/unload data from **Data Lakes**, **Data Warehouses** and **Databases**.\n", "\n", - "Check our [list of functionalities](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html)." + "Check our [list of functionalities](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html)." ] }, { @@ -30,15 +30,15 @@ "\n", "Wrangler runs almost anywhere over Python 3.7, 3.8, 3.9 and 3.10, so there are several different ways to install it in the desired environment.\n", "\n", - " - [PyPi (pip)](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#pypi-pip)\n", - " - [Conda](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#conda)\n", - " - [AWS Lambda Layer](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-lambda-layer)\n", - " - [AWS Glue Python Shell Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-python-shell-jobs)\n", - " - [AWS Glue PySpark Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-pyspark-jobs)\n", - " - [Amazon SageMaker Notebook](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#amazon-sagemaker-notebook)\n", - " - [Amazon SageMaker Notebook Lifecycle](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#amazon-sagemaker-notebook-lifecycle)\n", - " - [EMR Cluster](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#emr-cluster)\n", - " - [From source](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#from-source)\n", + " - [PyPi (pip)](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#pypi-pip)\n", + " - [Conda](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#conda)\n", + " - [AWS Lambda Layer](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-lambda-layer)\n", + " - [AWS Glue Python Shell Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-python-shell-jobs)\n", + " - [AWS Glue PySpark Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-pyspark-jobs)\n", + " - [Amazon SageMaker Notebook](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#amazon-sagemaker-notebook)\n", + " - [Amazon SageMaker Notebook Lifecycle](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#amazon-sagemaker-notebook-lifecycle)\n", + " - [EMR Cluster](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#emr-cluster)\n", + " - [From source](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#from-source)\n", "\n", "Some good practices for most of the above methods are:\n", " - Use new and individual Virtual Environments for each project ([venv](https://docs.python.org/3/library/venv.html))\n", diff --git a/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server.ipynb b/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server.ipynb index e56e5f29b..ba7d500f7 100644 --- a/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server.ipynb +++ b/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server.ipynb @@ -10,14 +10,14 @@ "\n", "[Wrangler](https://github.com/awslabs/aws-data-wrangler)'s Redshift, MySQL and PostgreSQL have two basic function in common that tries to follow the Pandas conventions, but add more data type consistency.\n", "\n", - "- [wr.redshift.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.redshift.to_sql.html)\n", - "- [wr.redshift.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.redshift.read_sql_query.html)\n", - "- [wr.mysql.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.mysql.to_sql.html)\n", - "- [wr.mysql.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.mysql.read_sql_query.html)\n", - "- [wr.postgresql.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.postgresql.to_sql.html)\n", - "- [wr.postgresql.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.postgresql.read_sql_query.html)\n", - "- [wr.sqlserver.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.sqlserver.to_sql.html)\n", - "- [wr.sqlserver.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.sqlserver.read_sql_query.html)" + "- [wr.redshift.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.redshift.to_sql.html)\n", + "- [wr.redshift.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.redshift.read_sql_query.html)\n", + "- [wr.mysql.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.mysql.to_sql.html)\n", + "- [wr.mysql.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.mysql.read_sql_query.html)\n", + "- [wr.postgresql.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.postgresql.to_sql.html)\n", + "- [wr.postgresql.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.postgresql.read_sql_query.html)\n", + "- [wr.sqlserver.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.sqlserver.to_sql.html)\n", + "- [wr.sqlserver.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.sqlserver.read_sql_query.html)" ] }, { @@ -41,10 +41,10 @@ "source": [ "## Connect using the Glue Catalog Connections\n", "\n", - "- [wr.redshift.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.redshift.connect.html)\n", - "- [wr.mysql.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.mysql.connect.html)\n", - "- [wr.postgresql.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.postgresql.connect.html)\n", - "- [wr.sqlserver.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.sqlserver.connect.html)" + "- [wr.redshift.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.redshift.connect.html)\n", + "- [wr.mysql.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.mysql.connect.html)\n", + "- [wr.postgresql.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.postgresql.connect.html)\n", + "- [wr.sqlserver.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.sqlserver.connect.html)" ] }, { diff --git a/tutorials/014 - Schema Evolution.ipynb b/tutorials/014 - Schema Evolution.ipynb index ec0c6628c..ab4661d86 100644 --- a/tutorials/014 - Schema Evolution.ipynb +++ b/tutorials/014 - Schema Evolution.ipynb @@ -10,9 +10,9 @@ "\n", "Wrangler support new **columns** on Parquet and CSV datasets through:\n", "\n", - "- [wr.s3.to_parquet()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet)\n", - "- [wr.s3.store_parquet_metadata()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.store_parquet_metadata.html#awswrangler.s3.store_parquet_metadata) i.e. \"Crawler\"\n", - "- [wr.s3.to_csv()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.to_csv.html#awswrangler.s3.to_csv)" + "- [wr.s3.to_parquet()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet)\n", + "- [wr.s3.store_parquet_metadata()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.store_parquet_metadata.html#awswrangler.s3.store_parquet_metadata) i.e. \"Crawler\"\n", + "- [wr.s3.to_csv()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.to_csv.html#awswrangler.s3.to_csv)" ] }, { diff --git a/tutorials/021 - Global Configurations.ipynb b/tutorials/021 - Global Configurations.ipynb index 352a3b5ce..4502facaf 100644 --- a/tutorials/021 - Global Configurations.ipynb +++ b/tutorials/021 - Global Configurations.ipynb @@ -13,7 +13,7 @@ "- **Environment variables**\n", "- **wr.config**\n", "\n", - "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html) to see if your function has some argument that can be configured through Global configurations.*\n", + "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html) to see if your function has some argument that can be configured through Global configurations.*\n", "\n", "*P.P.S. One exception to the above mentioned rules is the `botocore_config` property. It cannot be set through environment variables\n", "but only via `wr.config`. It will be used as the `botocore.config.Config` for all underlying `boto3` calls.\n", diff --git a/tutorials/022 - Writing Partitions Concurrently.ipynb b/tutorials/022 - Writing Partitions Concurrently.ipynb index 8ed003674..87964c827 100644 --- a/tutorials/022 - Writing Partitions Concurrently.ipynb +++ b/tutorials/022 - Writing Partitions Concurrently.ipynb @@ -13,7 +13,7 @@ " If True will increase the parallelism level during the partitions writing. It will decrease the\n", " writing time and increase the memory usage.\n", "\n", - "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html) to see it has some argument that can be configured through Global configurations.*" + "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html) to see it has some argument that can be configured through Global configurations.*" ] }, { diff --git a/tutorials/023 - Flexible Partitions Filter.ipynb b/tutorials/023 - Flexible Partitions Filter.ipynb index 8e19ae11e..9e34a1126 100644 --- a/tutorials/023 - Flexible Partitions Filter.ipynb +++ b/tutorials/023 - Flexible Partitions Filter.ipynb @@ -16,7 +16,7 @@ " - Ignored if `dataset=False`.\n", " \n", "\n", - "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html) to see it has some argument that can be configured through Global configurations.*" + "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html) to see it has some argument that can be configured through Global configurations.*" ] }, { From b2b3c8100bb59c42cf7bc4d7ee717c725f88934b Mon Sep 17 00:00:00 2001 From: Brannon Imamura Date: Tue, 5 Apr 2022 14:30:20 -0500 Subject: [PATCH 3/4] Revert "bump vers" This reverts commit 545aaba822d24741a3887a00f928b950a2f0891c. --- .bumpversion.cfg | 2 +- CONTRIBUTING_COMMON_ERRORS.md | 6 +- README.md | 72 +++++++++---------- awswrangler/__metadata__.py | 2 +- awswrangler/athena/_read.py | 16 ++--- awswrangler/s3/_read_parquet.py | 2 +- awswrangler/s3/_read_text.py | 6 +- awswrangler/s3/_write_parquet.py | 6 +- awswrangler/s3/_write_text.py | 12 ++-- docs/source/install.rst | 10 +-- docs/source/what.rst | 2 +- pyproject.toml | 2 +- test_infra/pyproject.toml | 2 +- tests/test_metadata.py | 2 +- tutorials/001 - Introduction.ipynb | 20 +++--- ...shift, MySQL, PostgreSQL, SQL Server.ipynb | 24 +++---- tutorials/014 - Schema Evolution.ipynb | 6 +- tutorials/021 - Global Configurations.ipynb | 2 +- ...22 - Writing Partitions Concurrently.ipynb | 2 +- .../023 - Flexible Partitions Filter.ipynb | 2 +- 20 files changed, 99 insertions(+), 99 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 1c951af81..380e4da76 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.15.1 +current_version = 2.15.0 commit = False tag = False tag_name = {new_version} diff --git a/CONTRIBUTING_COMMON_ERRORS.md b/CONTRIBUTING_COMMON_ERRORS.md index 0626df86c..2f69565de 100644 --- a/CONTRIBUTING_COMMON_ERRORS.md +++ b/CONTRIBUTING_COMMON_ERRORS.md @@ -13,9 +13,9 @@ Requirement already satisfied: pbr!=2.1.0,>=2.0.0 in ./.venv/lib/python3.7/site- Using legacy 'setup.py install' for python-Levenshtein, since package 'wheel' is not installed. Installing collected packages: awswrangler, python-Levenshtein Attempting uninstall: awswrangler - Found existing installation: awswrangler 2.15.1 - Uninstalling awswrangler-2.15.1: - Successfully uninstalled awswrangler-2.15.1 + Found existing installation: awswrangler 2.15.0 + Uninstalling awswrangler-2.15.0: + Successfully uninstalled awswrangler-2.15.0 Running setup.py develop for awswrangler Running setup.py install for python-Levenshtein ... error ERROR: Command errored out with exit status 1: diff --git a/README.md b/README.md index 446dcac05..0020accad 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Easy integration with Athena, Glue, Redshift, Timestream, OpenSearch, Neptune, Q > An [AWS Professional Service](https://aws.amazon.com/professional-services/) open source initiative | aws-proserve-opensource@amazon.com -[![Release](https://img.shields.io/badge/release-2.15.1-brightgreen.svg)](https://pypi.org/project/awswrangler/) +[![Release](https://img.shields.io/badge/release-2.15.0-brightgreen.svg)](https://pypi.org/project/awswrangler/) [![Python Version](https://img.shields.io/badge/python-3.7%20%7C%203.7%20%7C%203.8%20%7C%203.10-brightgreen.svg)](https://anaconda.org/conda-forge/awswrangler) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) @@ -23,7 +23,7 @@ Easy integration with Athena, Glue, Redshift, Timestream, OpenSearch, Neptune, Q | **[PyPi](https://pypi.org/project/awswrangler/)** | [![PyPI Downloads](https://pepy.tech/badge/awswrangler)](https://pypi.org/project/awswrangler/) | `pip install awswrangler` | | **[Conda](https://anaconda.org/conda-forge/awswrangler)** | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/awswrangler.svg)](https://anaconda.org/conda-forge/awswrangler) | `conda install -c conda-forge awswrangler` | -> ⚠️ **For platforms without PyArrow 3 support (e.g. [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#emr-cluster), [Glue PySpark Job](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-pyspark-jobs), MWAA):**
+> ⚠️ **For platforms without PyArrow 3 support (e.g. [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#emr-cluster), [Glue PySpark Job](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-pyspark-jobs), MWAA):**
➡️ `pip install pyarrow==2 awswrangler` Powered By [](https://arrow.apache.org/powered_by/) @@ -42,7 +42,7 @@ Powered By [](http Installation command: `pip install awswrangler` -> ⚠️ **For platforms without PyArrow 3 support (e.g. [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#emr-cluster), [Glue PySpark Job](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-pyspark-jobs), MWAA):**
+> ⚠️ **For platforms without PyArrow 3 support (e.g. [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#emr-cluster), [Glue PySpark Job](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-pyspark-jobs), MWAA):**
➡️`pip install pyarrow==2 awswrangler` ```py3 @@ -96,17 +96,17 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3 ## [Read The Docs](https://aws-data-wrangler.readthedocs.io/) -- [**What is AWS Data Wrangler?**](https://aws-data-wrangler.readthedocs.io/en/2.15.1/what.html) -- [**Install**](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html) - - [PyPi (pip)](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#pypi-pip) - - [Conda](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#conda) - - [AWS Lambda Layer](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-lambda-layer) - - [AWS Glue Python Shell Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-python-shell-jobs) - - [AWS Glue PySpark Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-pyspark-jobs) - - [Amazon SageMaker Notebook](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#amazon-sagemaker-notebook) - - [Amazon SageMaker Notebook Lifecycle](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#amazon-sagemaker-notebook-lifecycle) - - [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#emr) - - [From source](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#from-source) +- [**What is AWS Data Wrangler?**](https://aws-data-wrangler.readthedocs.io/en/2.15.0/what.html) +- [**Install**](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html) + - [PyPi (pip)](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#pypi-pip) + - [Conda](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#conda) + - [AWS Lambda Layer](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-lambda-layer) + - [AWS Glue Python Shell Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-python-shell-jobs) + - [AWS Glue PySpark Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-pyspark-jobs) + - [Amazon SageMaker Notebook](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#amazon-sagemaker-notebook) + - [Amazon SageMaker Notebook Lifecycle](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#amazon-sagemaker-notebook-lifecycle) + - [EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#emr) + - [From source](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#from-source) - [**Tutorials**](https://github.com/awslabs/aws-data-wrangler/tree/main/tutorials) - [001 - Introduction](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/001%20-%20Introduction.ipynb) - [002 - Sessions](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/002%20-%20Sessions.ipynb) @@ -141,28 +141,28 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3 - [031 - OpenSearch](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/031%20-%20OpenSearch.ipynb) - [032 - Lake Formation Governed Tables](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/032%20-%20Lake%20Formation%20Governed%20Tables.ipynb) - [033 - Amazon Neptune](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/033%20-%20Amazon%20Neptune.ipynb) -- [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html) - - [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-s3) - - [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#aws-glue-catalog) - - [Amazon Athena](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-athena) - - [AWS Lake Formation](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#aws-lake-formation) - - [Amazon Redshift](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-redshift) - - [PostgreSQL](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#postgresql) - - [MySQL](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#mysql) - - [SQL Server](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#sqlserver) - - [Data API Redshift](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#data-api-redshift) - - [Data API RDS](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#data-api-rds) - - [OpenSearch](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#opensearch) - - [Amazon Neptune](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-neptune) - - [DynamoDB](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#dynamodb) - - [Amazon Timestream](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-timestream) - - [Amazon EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-emr) - - [Amazon CloudWatch Logs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-cloudwatch-logs) - - [Amazon Chime](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-chime) - - [Amazon QuickSight](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#amazon-quicksight) - - [AWS STS](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#aws-sts) - - [AWS Secrets Manager](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#aws-secrets-manager) - - [Global Configurations](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#global-configurations) +- [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html) + - [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-s3) + - [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#aws-glue-catalog) + - [Amazon Athena](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-athena) + - [AWS Lake Formation](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#aws-lake-formation) + - [Amazon Redshift](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-redshift) + - [PostgreSQL](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#postgresql) + - [MySQL](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#mysql) + - [SQL Server](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#sqlserver) + - [Data API Redshift](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#data-api-redshift) + - [Data API RDS](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#data-api-rds) + - [OpenSearch](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#opensearch) + - [Amazon Neptune](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-neptune) + - [DynamoDB](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#dynamodb) + - [Amazon Timestream](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-timestream) + - [Amazon EMR](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-emr) + - [Amazon CloudWatch Logs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-cloudwatch-logs) + - [Amazon Chime](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-chime) + - [Amazon QuickSight](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#amazon-quicksight) + - [AWS STS](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#aws-sts) + - [AWS Secrets Manager](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#aws-secrets-manager) + - [Global Configurations](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html#global-configurations) - [**License**](https://github.com/awslabs/aws-data-wrangler/blob/main/LICENSE.txt) - [**Contributing**](https://github.com/awslabs/aws-data-wrangler/blob/main/CONTRIBUTING.md) - [**Legacy Docs** (pre-1.0.0)](https://aws-data-wrangler.readthedocs.io/en/0.3.3/) diff --git a/awswrangler/__metadata__.py b/awswrangler/__metadata__.py index af3ed9bf6..2302622ee 100644 --- a/awswrangler/__metadata__.py +++ b/awswrangler/__metadata__.py @@ -7,5 +7,5 @@ __title__: str = "awswrangler" __description__: str = "Pandas on AWS." -__version__: str = "2.15.1" +__version__: str = "2.15.0" __license__: str = "Apache License 2.0" diff --git a/awswrangler/athena/_read.py b/awswrangler/athena/_read.py index 6bf896554..71c97ed95 100644 --- a/awswrangler/athena/_read.py +++ b/awswrangler/athena/_read.py @@ -591,11 +591,11 @@ def read_sql_query( **Related tutorial:** - - `Amazon Athena `_ - - `Athena Cache `_ - - `Global Configurations `_ **There are three approaches available through ctas_approach and unload_approach parameters:** @@ -660,7 +660,7 @@ def read_sql_query( /athena.html#Athena.Client.get_query_execution>`_ . For a practical example check out the - `related tutorial `_! @@ -904,11 +904,11 @@ def read_sql_table( **Related tutorial:** - - `Amazon Athena `_ - - `Athena Cache `_ - - `Global Configurations `_ **There are two approaches to be defined through ctas_approach parameter:** @@ -953,7 +953,7 @@ def read_sql_table( /athena.html#Athena.Client.get_query_execution>`_ . For a practical example check out the - `related tutorial `_! diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py index 84dd873c0..afb340613 100644 --- a/awswrangler/s3/_read_parquet.py +++ b/awswrangler/s3/_read_parquet.py @@ -849,7 +849,7 @@ def read_parquet_table( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html columns : List[str], optional Names of columns to read from the file(s). validate_schema: diff --git a/awswrangler/s3/_read_text.py b/awswrangler/s3/_read_text.py index e36a7f3ae..8caef1664 100644 --- a/awswrangler/s3/_read_text.py +++ b/awswrangler/s3/_read_text.py @@ -241,7 +241,7 @@ def read_csv( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html pandas_kwargs : KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. @@ -389,7 +389,7 @@ def read_fwf( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html pandas_kwargs: KEYWORD arguments forwarded to pandas.read_fwf(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. @@ -541,7 +541,7 @@ def read_json( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/023%20-%20Flexible%20Partitions%20Filter.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html pandas_kwargs: KEYWORD arguments forwarded to pandas.read_json(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. diff --git a/awswrangler/s3/_write_parquet.py b/awswrangler/s3/_write_parquet.py index a3c4ceff4..897d03818 100644 --- a/awswrangler/s3/_write_parquet.py +++ b/awswrangler/s3/_write_parquet.py @@ -297,18 +297,18 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals,too-many-b concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode: str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/004%20-%20Parquet%20Datasets.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/004%20-%20Parquet%20Datasets.html catalog_versioning : bool If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it. schema_evolution : bool If True allows schema evolution (new or missing columns), otherwise a exception will be raised. True by default. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/014%20-%20Schema%20Evolution.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional diff --git a/awswrangler/s3/_write_text.py b/awswrangler/s3/_write_text.py index f84347ec7..2684fda3f 100644 --- a/awswrangler/s3/_write_text.py +++ b/awswrangler/s3/_write_text.py @@ -173,18 +173,18 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode : str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet + https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet catalog_versioning : bool If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it. schema_evolution : bool If True allows schema evolution (new or missing columns), otherwise a exception will be raised. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")). False by default. Related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/014%20-%20Schema%20Evolution.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional @@ -746,18 +746,18 @@ def to_json( # pylint: disable=too-many-arguments,too-many-locals,too-many-stat concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode : str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet + https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet catalog_versioning : bool If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it. schema_evolution : bool If True allows schema evolution (new or missing columns), otherwise a exception will be raised. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://aws-data-wrangler.readthedocs.io/en/2.15.1/tutorials/014%20-%20Schema%20Evolution.html + https://aws-data-wrangler.readthedocs.io/en/2.15.0/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional diff --git a/docs/source/install.rst b/docs/source/install.rst index dbb609338..9828044dd 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -100,7 +100,7 @@ Here is an example of how to create and use the AWS Data Wrangler Lambda layer i "wrangler-layer", location=sam.CfnApplication.ApplicationLocationProperty( application_id="arn:aws:serverlessrepo:us-east-1:336392948345:applications/aws-data-wrangler-layer-py3-8", - semantic_version="2.15.1", # Get the latest version from https://github.com/awslabs/aws-data-wrangler/releases + semantic_version="2.15.0", # Get the latest version from https://github.com/awslabs/aws-data-wrangler/releases ), ) @@ -120,7 +120,7 @@ Here is an example of how to create and use the AWS Data Wrangler Lambda layer i AWS Glue Python Shell Jobs -------------------------- -.. note:: Glue Python Shell runs on Python3.6, for which support was dropped in version 2.15.1 of Wrangler. Please use version 2.14.0 of the library or below. +.. note:: Glue Python Shell runs on Python3.6, for which support was dropped in version 2.15.0 of Wrangler. Please use version 2.14.0 of the library or below. 1 - Go to `GitHub's release page `_ and download the wheel file (.whl) related to the desired version. Alternatively, you can download the wheel from the `public artifacts bucket `_. @@ -144,7 +144,7 @@ Go to your Glue PySpark job and create a new *Job parameters* key/value: To install a specific version, set the value for the above Job parameter as follows: -* Value: ``cython==0.29.21,pg8000==1.21.0,pyarrow==2,pandas==1.3.0,awswrangler==2.15.1`` +* Value: ``cython==0.29.21,pg8000==1.21.0,pyarrow==2,pandas==1.3.0,awswrangler==2.15.0`` .. note:: Pyarrow 3 is not currently supported in Glue PySpark Jobs, which is why an installation of pyarrow 2 is required. @@ -163,7 +163,7 @@ Lambda zipped layers and Python wheels are stored in a publicly accessible S3 bu * Python wheel: ``awswrangler--py3-none-any.whl`` -For example: ``s3://aws-data-wrangler-public-artifacts/releases/2.15.1/awswrangler-layer-2.15.1-py3.8.zip`` +For example: ``s3://aws-data-wrangler-public-artifacts/releases/2.15.0/awswrangler-layer-2.15.0-py3.8.zip`` Amazon SageMaker Notebook ------------------------- @@ -253,7 +253,7 @@ Despite not being a distributed library, AWS Data Wrangler could be used to comp sudo pip install pyarrow==2 awswrangler .. note:: Make sure to freeze the library version in the bootstrap for production - environments (e.g. awswrangler==2.15.1) + environments (e.g. awswrangler==2.15.0) .. note:: Pyarrow 3 is not currently supported in the default EMR image, which is why an installation of pyarrow 2 is required. diff --git a/docs/source/what.rst b/docs/source/what.rst index ace477701..7de0b2b24 100644 --- a/docs/source/what.rst +++ b/docs/source/what.rst @@ -8,4 +8,4 @@ DynamoDB, EMR, SecretManager, PostgreSQL, MySQL, SQLServer and S3 (Parquet, CSV, Built on top of other open-source projects like `Pandas `_, `Apache Arrow `_ and `Boto3 `_, it offers abstracted functions to execute usual ETL tasks like load/unload data from **Data Lakes**, **Data Warehouses** and **Databases**. -Check our `tutorials `_ or the `list of functionalities `_. \ No newline at end of file +Check our `tutorials `_ or the `list of functionalities `_. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3dfc24edf..3ad8794b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "awswrangler" -version = "2.15.1" +version = "2.15.0" description = "Pandas on AWS." authors = ["Igor Tavares"] license = "Apache License 2.0" diff --git a/test_infra/pyproject.toml b/test_infra/pyproject.toml index c3384a88a..292e98fed 100644 --- a/test_infra/pyproject.toml +++ b/test_infra/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "awswrangler - test infrastructure" -version = "2.15.1" +version = "2.15.0" description = "CDK test infrastructure for AWS" authors = ["Amazon Web Services"] license = "Apache License 2.0" diff --git a/tests/test_metadata.py b/tests/test_metadata.py index c610c8cb8..76a8a1b84 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -2,7 +2,7 @@ def test_metadata(): - assert wr.__version__ == "2.15.1" + assert wr.__version__ == "2.15.0" assert wr.__title__ == "awswrangler" assert wr.__description__ == "Pandas on AWS." assert wr.__license__ == "Apache License 2.0" diff --git a/tutorials/001 - Introduction.ipynb b/tutorials/001 - Introduction.ipynb index cc3b81a42..745216d9a 100644 --- a/tutorials/001 - Introduction.ipynb +++ b/tutorials/001 - Introduction.ipynb @@ -19,7 +19,7 @@ "\n", "Built on top of other open-source projects like [Pandas](https://github.com/pandas-dev/pandas), [Apache Arrow](https://github.com/apache/arrow) and [Boto3](https://github.com/boto/boto3), it offers abstracted functions to execute usual ETL tasks like load/unload data from **Data Lakes**, **Data Warehouses** and **Databases**.\n", "\n", - "Check our [list of functionalities](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html)." + "Check our [list of functionalities](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html)." ] }, { @@ -30,15 +30,15 @@ "\n", "Wrangler runs almost anywhere over Python 3.7, 3.8, 3.9 and 3.10, so there are several different ways to install it in the desired environment.\n", "\n", - " - [PyPi (pip)](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#pypi-pip)\n", - " - [Conda](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#conda)\n", - " - [AWS Lambda Layer](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-lambda-layer)\n", - " - [AWS Glue Python Shell Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-python-shell-jobs)\n", - " - [AWS Glue PySpark Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#aws-glue-pyspark-jobs)\n", - " - [Amazon SageMaker Notebook](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#amazon-sagemaker-notebook)\n", - " - [Amazon SageMaker Notebook Lifecycle](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#amazon-sagemaker-notebook-lifecycle)\n", - " - [EMR Cluster](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#emr-cluster)\n", - " - [From source](https://aws-data-wrangler.readthedocs.io/en/2.15.1/install.html#from-source)\n", + " - [PyPi (pip)](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#pypi-pip)\n", + " - [Conda](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#conda)\n", + " - [AWS Lambda Layer](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-lambda-layer)\n", + " - [AWS Glue Python Shell Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-python-shell-jobs)\n", + " - [AWS Glue PySpark Jobs](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#aws-glue-pyspark-jobs)\n", + " - [Amazon SageMaker Notebook](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#amazon-sagemaker-notebook)\n", + " - [Amazon SageMaker Notebook Lifecycle](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#amazon-sagemaker-notebook-lifecycle)\n", + " - [EMR Cluster](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#emr-cluster)\n", + " - [From source](https://aws-data-wrangler.readthedocs.io/en/2.15.0/install.html#from-source)\n", "\n", "Some good practices for most of the above methods are:\n", " - Use new and individual Virtual Environments for each project ([venv](https://docs.python.org/3/library/venv.html))\n", diff --git a/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server.ipynb b/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server.ipynb index ba7d500f7..e56e5f29b 100644 --- a/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server.ipynb +++ b/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server.ipynb @@ -10,14 +10,14 @@ "\n", "[Wrangler](https://github.com/awslabs/aws-data-wrangler)'s Redshift, MySQL and PostgreSQL have two basic function in common that tries to follow the Pandas conventions, but add more data type consistency.\n", "\n", - "- [wr.redshift.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.redshift.to_sql.html)\n", - "- [wr.redshift.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.redshift.read_sql_query.html)\n", - "- [wr.mysql.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.mysql.to_sql.html)\n", - "- [wr.mysql.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.mysql.read_sql_query.html)\n", - "- [wr.postgresql.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.postgresql.to_sql.html)\n", - "- [wr.postgresql.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.postgresql.read_sql_query.html)\n", - "- [wr.sqlserver.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.sqlserver.to_sql.html)\n", - "- [wr.sqlserver.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.sqlserver.read_sql_query.html)" + "- [wr.redshift.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.redshift.to_sql.html)\n", + "- [wr.redshift.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.redshift.read_sql_query.html)\n", + "- [wr.mysql.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.mysql.to_sql.html)\n", + "- [wr.mysql.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.mysql.read_sql_query.html)\n", + "- [wr.postgresql.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.postgresql.to_sql.html)\n", + "- [wr.postgresql.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.postgresql.read_sql_query.html)\n", + "- [wr.sqlserver.to_sql()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.sqlserver.to_sql.html)\n", + "- [wr.sqlserver.read_sql_query()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.sqlserver.read_sql_query.html)" ] }, { @@ -41,10 +41,10 @@ "source": [ "## Connect using the Glue Catalog Connections\n", "\n", - "- [wr.redshift.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.redshift.connect.html)\n", - "- [wr.mysql.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.mysql.connect.html)\n", - "- [wr.postgresql.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.postgresql.connect.html)\n", - "- [wr.sqlserver.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.sqlserver.connect.html)" + "- [wr.redshift.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.redshift.connect.html)\n", + "- [wr.mysql.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.mysql.connect.html)\n", + "- [wr.postgresql.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.postgresql.connect.html)\n", + "- [wr.sqlserver.connect()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.sqlserver.connect.html)" ] }, { diff --git a/tutorials/014 - Schema Evolution.ipynb b/tutorials/014 - Schema Evolution.ipynb index ab4661d86..ec0c6628c 100644 --- a/tutorials/014 - Schema Evolution.ipynb +++ b/tutorials/014 - Schema Evolution.ipynb @@ -10,9 +10,9 @@ "\n", "Wrangler support new **columns** on Parquet and CSV datasets through:\n", "\n", - "- [wr.s3.to_parquet()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet)\n", - "- [wr.s3.store_parquet_metadata()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.store_parquet_metadata.html#awswrangler.s3.store_parquet_metadata) i.e. \"Crawler\"\n", - "- [wr.s3.to_csv()](https://aws-data-wrangler.readthedocs.io/en/2.15.1/stubs/awswrangler.s3.to_csv.html#awswrangler.s3.to_csv)" + "- [wr.s3.to_parquet()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet)\n", + "- [wr.s3.store_parquet_metadata()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.store_parquet_metadata.html#awswrangler.s3.store_parquet_metadata) i.e. \"Crawler\"\n", + "- [wr.s3.to_csv()](https://aws-data-wrangler.readthedocs.io/en/2.15.0/stubs/awswrangler.s3.to_csv.html#awswrangler.s3.to_csv)" ] }, { diff --git a/tutorials/021 - Global Configurations.ipynb b/tutorials/021 - Global Configurations.ipynb index 4502facaf..352a3b5ce 100644 --- a/tutorials/021 - Global Configurations.ipynb +++ b/tutorials/021 - Global Configurations.ipynb @@ -13,7 +13,7 @@ "- **Environment variables**\n", "- **wr.config**\n", "\n", - "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html) to see if your function has some argument that can be configured through Global configurations.*\n", + "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html) to see if your function has some argument that can be configured through Global configurations.*\n", "\n", "*P.P.S. One exception to the above mentioned rules is the `botocore_config` property. It cannot be set through environment variables\n", "but only via `wr.config`. It will be used as the `botocore.config.Config` for all underlying `boto3` calls.\n", diff --git a/tutorials/022 - Writing Partitions Concurrently.ipynb b/tutorials/022 - Writing Partitions Concurrently.ipynb index 87964c827..8ed003674 100644 --- a/tutorials/022 - Writing Partitions Concurrently.ipynb +++ b/tutorials/022 - Writing Partitions Concurrently.ipynb @@ -13,7 +13,7 @@ " If True will increase the parallelism level during the partitions writing. It will decrease the\n", " writing time and increase the memory usage.\n", "\n", - "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html) to see it has some argument that can be configured through Global configurations.*" + "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html) to see it has some argument that can be configured through Global configurations.*" ] }, { diff --git a/tutorials/023 - Flexible Partitions Filter.ipynb b/tutorials/023 - Flexible Partitions Filter.ipynb index 9e34a1126..8e19ae11e 100644 --- a/tutorials/023 - Flexible Partitions Filter.ipynb +++ b/tutorials/023 - Flexible Partitions Filter.ipynb @@ -16,7 +16,7 @@ " - Ignored if `dataset=False`.\n", " \n", "\n", - "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html) to see it has some argument that can be configured through Global configurations.*" + "*P.S. Check the [function API doc](https://aws-data-wrangler.readthedocs.io/en/2.15.0/api.html) to see it has some argument that can be configured through Global configurations.*" ] }, { From 89a3ba1aa86a5d88c27209aeb869e6ac75081ac5 Mon Sep 17 00:00:00 2001 From: Abdel Jaidi Date: Wed, 6 Apr 2022 19:11:32 +0100 Subject: [PATCH 4/4] Missing docs --- awswrangler/redshift.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/awswrangler/redshift.py b/awswrangler/redshift.py index f6f2af0d8..001dc933b 100644 --- a/awswrangler/redshift.py +++ b/awswrangler/redshift.py @@ -1297,6 +1297,8 @@ def copy_from_files( # pylint: disable=too-many-locals,too-many-arguments Whether to commit the transaction. True by default. manifest: bool If set to true path argument accepts a S3 uri to a manifest file. + sql_copy_extra_params: Optional[List[str]] + Additional copy parameters to pass to the command. For example: ["STATUPDATE ON"] boto3_session : boto3.Session(), optional Boto3 Session. The default boto3 session will be used if boto3_session receive None. s3_additional_kwargs: @@ -1408,10 +1410,10 @@ def copy( # pylint: disable=too-many-arguments keep_files: bool = False, use_threads: Union[bool, int] = True, lock: bool = False, + sql_copy_extra_params: Optional[List[str]] = None, boto3_session: Optional[boto3.Session] = None, s3_additional_kwargs: Optional[Dict[str, str]] = None, max_rows_by_file: Optional[int] = 10_000_000, - sql_copy_extra_params: Optional[List[str]] = None, ) -> None: """Load Pandas DataFrame as a Table on Amazon Redshift using parquet files on S3 as stage. @@ -1499,6 +1501,8 @@ def copy( # pylint: disable=too-many-arguments If integer is provided, specified number is used. lock : bool True to execute LOCK command inside the transaction to force serializable isolation. + sql_copy_extra_params: Optional[List[str]] + Additional copy parameters to pass to the command. For example: ["STATUPDATE ON"] boto3_session : boto3.Session(), optional Boto3 Session. The default boto3 session will be used if boto3_session receive None. s3_additional_kwargs: