From 93db6f742f9bb44512ed3a8f450000814e593ef0 Mon Sep 17 00:00:00 2001
From: Abdel Jaidi <jaidisido@gmail.com>
Date: Tue, 7 Mar 2023 11:52:45 +0000
Subject: [PATCH 1/4] chore: Clean up unit tests

---
 awswrangler/athena/_read.py           | 48 +++++++------
 tests/unit/test_athena.py             | 11 +--
 tests/unit/test_athena_csv.py         | 64 ++++++-----------
 tests/unit/test_athena_parquet.py     | 15 +---
 tests/unit/test_catalog.py            |  5 --
 tests/unit/test_lakeformation.py      |  9 ---
 tests/unit/test_redshift.py           |  1 -
 tests/unit/test_routines.py           |  2 -
 tests/unit/test_s3_parquet.py         |  2 -
 tests/unit/test_s3_text_compressed.py | 98 ++++++++++-----------------
 10 files changed, 83 insertions(+), 172 deletions(-)

diff --git a/awswrangler/athena/_read.py b/awswrangler/athena/_read.py
index 228f60387..1e5845bb2 100644
--- a/awswrangler/athena/_read.py
+++ b/awswrangler/athena/_read.py
@@ -1357,7 +1357,7 @@ def read_sql_table(
     - `Global Configurations <https://aws-sdk-pandas.readthedocs.io/en/3.0.0rc2/
       tutorials/021%20-%20Global%20Configurations.html>`_
 
-    **There are two approaches to be defined through ctas_approach parameter:**
+    **There are three approaches available through ctas_approach and unload_approach parameters:**
 
     **1** - ctas_approach=True (Default):
 
@@ -1375,8 +1375,26 @@ def read_sql_table(
     - Does not support columns with repeated names.
     - Does not support columns with undefined data types.
     - A temporary table will be created and then deleted immediately.
+    - Does not support custom data_source/catalog_id.
+
+    **2** - unload_approach=True and ctas_approach=False:
+
+    Does an UNLOAD query on Athena and parse the Parquet result on s3.
+
+    PROS:
+
+    - Faster for mid and big result sizes.
+    - Can handle some level of nested types.
+    - Does not modify Glue Data Catalog
+
+    CONS:
 
-    **2** - ctas_approach=False:
+    - Output S3 path must be empty.
+    - Does not support timestamp with time zone.
+    - Does not support columns with repeated names.
+    - Does not support columns with undefined data types.
+
+    **3** - ctas_approach=False:
 
     Does a regular query on Athena and parse the regular CSV result on s3.
 
@@ -1385,6 +1403,7 @@ def read_sql_table(
     - Faster for small result sizes (less latency).
     - Does not require create/delete table permissions on Glue
     - Supports timestamp with time zone.
+    - Support custom data_source/catalog_id.
 
     CONS:
 
@@ -1423,16 +1442,15 @@ def read_sql_table(
 
     There are two batching strategies:
 
-    - If **chunksize=True**, depending on the size of the data, one or more data frames will be
-      returned per each file in the query result.
-      Unlike **chunksize=INTEGER**, rows from different files will not be mixed in the resulting data frames.
+    - If **chunksize=True**, depending on the size of the data, one or more data frames are returned per file in the query result.
+      Unlike **chunksize=INTEGER**, rows from different files are not mixed in the resulting data frames.
 
-    - If **chunksize=INTEGER**, awswrangler will iterate on the data by number of rows egual the received INTEGER.
+    - If **chunksize=INTEGER**, awswrangler iterates on the data by number of rows equal to the received INTEGER.
 
     `P.S.` `chunksize=True` is faster and uses less memory while `chunksize=INTEGER` is more precise
-    in number of rows for each Dataframe.
+    in number of rows for each data frame.
 
-    `P.P.S.` If `ctas_approach=False` and `chunksize=True`, you will always receive an interador with a
+    `P.P.S.` If `ctas_approach=False` and `chunksize=True`, you will always receive an iterator with a
     single DataFrame because regular Athena queries only produces a single output file.
 
     Note
@@ -1473,20 +1491,6 @@ def read_sql_table(
         For SSE-KMS, this is the KMS key ARN or ID.
     keep_files : bool
         Should awswrangler delete or keep the staging files produced by Athena?
-    ctas_database : str, optional
-        The name of the alternative database where the CTAS temporary table is stored.
-        If None, the default `database` is used.
-    ctas_temp_table_name : str, optional
-        The name of the temporary table and also the directory name on S3 where the CTAS result is stored.
-        If None, it will use the follow random pattern: `f"temp_table_{uuid.uuid4().hex}"`.
-        On S3 this directory will be under under the pattern: `f"{s3_output}/{ctas_temp_table_name}/"`.
-    ctas_bucketing_info: Tuple[List[str], int], optional
-        Tuple consisting of the column names used for bucketing as the first element and the number of buckets as the
-        second element.
-        Only `str`, `int` and `bool` are supported as column data types for bucketing.
-    ctas_write_compression: str, optional
-        Write compression for the temporary table where the CTAS result is stored.
-        Corresponds to the `write_compression` parameters for CREATE TABLE AS statement in Athena.
     use_threads : bool, int
         True to enable concurrent requests, False to disable multiple threads.
         If enabled os.cpu_count() will be used as the max number of threads.
diff --git a/tests/unit/test_athena.py b/tests/unit/test_athena.py
index 436341c1d..0c14e3bf0 100644
--- a/tests/unit/test_athena.py
+++ b/tests/unit/test_athena.py
@@ -1,6 +1,7 @@
 import datetime
 import logging
 import string
+import time
 from unittest.mock import patch
 
 import boto3
@@ -224,7 +225,6 @@ def test_athena_create_ctas(path, glue_table, glue_table2, glue_database, glue_c
 
 @pytest.mark.xfail(is_ray_modin, raises=AssertionError, reason="Index equality regression")
 def test_athena(path, glue_database, glue_table, kms_key, workgroup0, workgroup1):
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
     wr.s3.to_parquet(
         df=get_df(),
         path=path,
@@ -301,7 +301,6 @@ def test_athena(path, glue_database, glue_table, kms_key, workgroup0, workgroup1
 
 
 def test_read_sql_query_parameter_formatting_respects_prefixes(path, glue_database, glue_table, workgroup0):
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
     wr.s3.to_parquet(
         df=get_df(),
         path=path,
@@ -329,7 +328,6 @@ def test_read_sql_query_parameter_formatting_respects_prefixes(path, glue_databa
     [("string", "Seattle"), ("date", datetime.date(2020, 1, 1)), ("bool", True), ("category", 1.0)],
 )
 def test_read_sql_query_parameter_formatting(path, glue_database, glue_table, workgroup0, col_name, col_value):
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
     wr.s3.to_parquet(
         df=get_df(),
         path=path,
@@ -354,7 +352,6 @@ def test_read_sql_query_parameter_formatting(path, glue_database, glue_table, wo
 
 @pytest.mark.parametrize("col_name", [("string"), ("date"), ("bool"), ("category")])
 def test_read_sql_query_parameter_formatting_null(path, glue_database, glue_table, workgroup0, col_name):
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
     wr.s3.to_parquet(
         df=get_df(),
         path=path,
@@ -381,6 +378,7 @@ def test_athena_query_cancelled(glue_database):
     query_execution_id = wr.athena.start_query_execution(
         sql="SELECT " + "rand(), " * 10000 + "rand()", database=glue_database
     )
+    time.sleep(2)  # On a cold start, Athena might not have started the query
     wr.athena.stop_query_execution(query_execution_id=query_execution_id)
     with pytest.raises(wr.exceptions.QueryCancelled):
         assert wr.athena.wait_query(query_execution_id=query_execution_id)
@@ -551,7 +549,6 @@ def test_category(path, glue_table, glue_database):
     )
     for df2 in dfs:
         ensure_data_types_category(df2)
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table) is True
 
 
 @pytest.mark.parametrize("workgroup", [None, 0, 1, 2, 3])
@@ -784,7 +781,6 @@ def test_bucketing_catalog_parquet_table(path, glue_database, glue_table):
     table = next(wr.catalog.get_tables(name_contains=glue_table))
     assert table["StorageDescriptor"]["NumberOfBuckets"] == nb_of_buckets
     assert table["StorageDescriptor"]["BucketColumns"] == bucket_cols
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
 
 
 @pytest.mark.parametrize("bucketing_data", [[0, 1, 2], [False, True, False], ["b", "c", "d"]])
@@ -873,7 +869,6 @@ def test_bucketing_catalog_csv_table(path, glue_database, glue_table):
     table = next(wr.catalog.get_tables(name_contains=glue_table))
     assert table["StorageDescriptor"]["NumberOfBuckets"] == nb_of_buckets
     assert table["StorageDescriptor"]["BucketColumns"] == bucket_cols
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
 
 
 @pytest.mark.parametrize("bucketing_data", [[0, 1, 2], [False, True, False], ["b", "c", "d"]])
@@ -1196,7 +1191,6 @@ def test_bucketing_combined_csv_saving(path, glue_database, glue_table):
 
 
 def test_start_query_execution_wait(path, glue_database, glue_table):
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
     wr.s3.to_parquet(
         df=get_df(),
         path=path,
@@ -1358,7 +1352,6 @@ def test_get_query_execution(workgroup0, workgroup1):
 
 @pytest.mark.parametrize("compression", [None, "snappy", "gzip"])
 def test_read_sql_query_ctas_write_compression(path, glue_database, glue_table, compression):
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
     wr.s3.to_parquet(
         df=get_df(),
         path=path,
diff --git a/tests/unit/test_athena_csv.py b/tests/unit/test_athena_csv.py
index 3e4661d62..15c91c94b 100644
--- a/tests/unit/test_athena_csv.py
+++ b/tests/unit/test_athena_csv.py
@@ -1,6 +1,5 @@
 import csv
 import logging
-from sys import version_info
 
 import boto3
 import pyarrow as pa
@@ -290,7 +289,6 @@ def test_csv_catalog(path, glue_table, glue_database, use_threads, concurrent_pa
     assert len(df2.columns) == 11
     assert df2["id"].sum() == 6
     ensure_data_types_csv(df2)
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table) is True
 
 
 @pytest.mark.parametrize("use_threads", [True, False])
@@ -377,7 +375,6 @@ def test_athena_csv_types(path, glue_database, glue_table):
     assert len(df2.columns) == 10
     assert df2["id"].sum() == 6
     ensure_data_types_csv(df2)
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table) is True
 
 
 @pytest.mark.parametrize("use_threads", [True, False])
@@ -457,47 +454,26 @@ def test_failing_catalog(path, glue_table, use_threads):
 @pytest.mark.parametrize("concurrent_partitioning", [True, False])
 @pytest.mark.parametrize("compression", ["gzip", "bz2", None])
 def test_csv_compressed(path, glue_table, glue_database, use_threads, concurrent_partitioning, compression):
-    df = get_df_csv()
-    if version_info < (3, 7) and compression:
-        with pytest.raises(wr.exceptions.InvalidArgument):
-            wr.s3.to_csv(
-                df=df,
-                path=path,
-                sep="\t",
-                index=True,
-                use_threads=use_threads,
-                boto3_session=None,
-                s3_additional_kwargs=None,
-                dataset=True,
-                partition_cols=["par0", "par1"],
-                mode="overwrite",
-                table=glue_table,
-                database=glue_database,
-                concurrent_partitioning=concurrent_partitioning,
-                compression=compression,
-            )
-    else:
-        wr.s3.to_csv(
-            df=df,
-            path=path,
-            sep="\t",
-            index=True,
-            use_threads=use_threads,
-            boto3_session=None,
-            s3_additional_kwargs=None,
-            dataset=True,
-            partition_cols=["par0", "par1"],
-            mode="overwrite",
-            table=glue_table,
-            database=glue_database,
-            concurrent_partitioning=concurrent_partitioning,
-            compression=compression,
-        )
-        df2 = wr.athena.read_sql_table(glue_table, glue_database)
-        assert df2.shape == (3, 11)
-        assert df2["id"].sum() == 6
-        ensure_data_types_csv(df2)
-        assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table) is True
+    wr.s3.to_csv(
+        df=get_df_csv(),
+        path=path,
+        sep="\t",
+        index=True,
+        use_threads=use_threads,
+        boto3_session=None,
+        s3_additional_kwargs=None,
+        dataset=True,
+        partition_cols=["par0", "par1"],
+        mode="overwrite",
+        table=glue_table,
+        database=glue_database,
+        concurrent_partitioning=concurrent_partitioning,
+        compression=compression,
+    )
+    df2 = wr.athena.read_sql_table(glue_table, glue_database)
+    assert df2.shape == (3, 11)
+    assert df2["id"].sum() == 6
+    ensure_data_types_csv(df2)
 
 
 @pytest.mark.parametrize("use_threads", [True, False])
diff --git a/tests/unit/test_athena_parquet.py b/tests/unit/test_athena_parquet.py
index 138aedb13..4302f2407 100644
--- a/tests/unit/test_athena_parquet.py
+++ b/tests/unit/test_athena_parquet.py
@@ -74,8 +74,6 @@ def test_parquet_catalog(path, path2, glue_table, glue_table2, glue_database):
     assert len(columns_types) == 18
     assert len(partitions_types) == 2
     assert len(partitions_values) == 2
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table) is True
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table2) is True
 
 
 @pytest.mark.parametrize("use_threads", [True, False])
@@ -163,8 +161,6 @@ def test_parquet_catalog_casting(path, glue_database, glue_table):
     df = wr.athena.read_sql_table(table=glue_table, database=glue_database, ctas_approach=False)
     assert df.shape == (3, 16)
     ensure_data_types(df=df, has_list=False)
-    wr.s3.delete_objects(path=path)
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table) is True
 
 
 def test_parquet_catalog_casting_to_string_with_null(path, glue_table, glue_database):
@@ -208,8 +204,6 @@ def test_parquet_compress(path, glue_table, glue_database, compression):
     df2 = wr.athena.read_sql_table(glue_table, glue_database)
     ensure_data_types(df2)
     df2 = wr.s3.read_parquet(path=path)
-    wr.s3.delete_objects(path=path)
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table) is True
     ensure_data_types(df2)
 
 
@@ -242,7 +236,6 @@ def test_parquet_char_length(path, glue_database, glue_table):
 @pytest.mark.parametrize("col2", [[1, 1, 1, 1, 1], [1, 2, 3, 4, 5], [1, 1, 1, 1, 2], [1, 2, 2, 2, 2]])
 @pytest.mark.parametrize("chunked", [True, 1, 2, 100])
 def test_parquet_chunked(path, glue_database, glue_table, col2, chunked):
-    wr.s3.delete_objects(path=path)
     values = list(range(5))
     df = pd.DataFrame({"col1": values, "col2": col2})
     wr.s3.to_parquet(
@@ -274,12 +267,9 @@ def test_parquet_chunked(path, glue_database, glue_table, col2, chunked):
             assert chunked == len(df2)
         assert chunked >= len(dfs[-1])
 
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table) is True
 
-
-@pytest.mark.xfail(is_ray_modin, raises=AssertionError, reason="Issue since upgrading to Ray 2.3")
+@pytest.mark.xfail(is_ray_modin, raises=AssertionError, reason="Issue since upgrading to PyArrow 11.0")
 def test_unsigned_parquet(path, glue_database, glue_table):
-    wr.s3.delete_objects(path=path)
     df = pd.DataFrame({"c0": [0, 0, (2**8) - 1], "c1": [0, 0, (2**16) - 1], "c2": [0, 0, (2**32) - 1]})
     df["c0"] = df.c0.astype("uint8")
     df["c1"] = df.c1.astype("uint16")
@@ -317,9 +307,6 @@ def test_unsigned_parquet(path, glue_database, glue_table):
             mode="overwrite",
         )
 
-    wr.s3.delete_objects(path=path)
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
-
 
 def test_parquet_overwrite_partition_cols(path, glue_database, glue_table):
     df = pd.DataFrame({"c0": [1, 2, 1, 2], "c1": [1, 2, 1, 2], "c2": [2, 1, 2, 1]})
diff --git a/tests/unit/test_catalog.py b/tests/unit/test_catalog.py
index 4a867be4b..0e7fb14b9 100644
--- a/tests/unit/test_catalog.py
+++ b/tests/unit/test_catalog.py
@@ -246,9 +246,6 @@ def test_catalog_get_databases(glue_database: str) -> None:
 
 
 def test_catalog_versioning(path: str, glue_database: str, glue_table: str, glue_table2: str) -> None:
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
-    wr.s3.delete_objects(path=path)
-
     # Version 1 - Parquet
     df = pd.DataFrame({"c0": [1, 2]})
     wr.s3.to_parquet(df=df, path=path, dataset=True, database=glue_database, table=glue_table, mode="overwrite")[
@@ -429,8 +426,6 @@ def test_catalog_columns(path: str, glue_table: str, glue_database: str) -> None
     assert df2["id"].sum() == 9
     ensure_data_types_csv(df2)
 
-    assert wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table) is True
-
 
 @pytest.mark.parametrize("use_catalog_id", [False, True])
 def test_create_database(random_glue_database: str, account_id: str, use_catalog_id: bool) -> None:
diff --git a/tests/unit/test_lakeformation.py b/tests/unit/test_lakeformation.py
index 28a59df9e..ffd7e04b8 100644
--- a/tests/unit/test_lakeformation.py
+++ b/tests/unit/test_lakeformation.py
@@ -22,9 +22,6 @@
 
 
 def test_lakeformation(path, path2, glue_database, glue_table, glue_table2, use_threads=False):
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table2)
-
     wr.s3.to_parquet(
         df=get_df(governed=True),
         path=path,
@@ -97,9 +94,6 @@ def test_lakeformation(path, path2, glue_database, glue_table, glue_table2, use_
 
 
 def test_lakeformation_multi_transaction(path, path2, glue_database, glue_table, glue_table2, use_threads=True):
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table2)
-
     df = pd.DataFrame({"c0": [0, None]}, dtype="Int64")
     transaction_id = wr.lakeformation.start_transaction(read_only=False)
     wr.s3.to_parquet(
@@ -168,9 +162,6 @@ def test_lakeformation_multi_transaction(path, path2, glue_database, glue_table,
     ],
 )
 def test_lakeformation_partiql_formatting(path, path2, glue_database, glue_table, glue_table2, col_name, col_value):
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table2)
-
     wr.s3.to_parquet(
         df=get_df_list(governed=True),
         path=path,
diff --git a/tests/unit/test_redshift.py b/tests/unit/test_redshift.py
index e19e02e7c..ba6eb492e 100644
--- a/tests/unit/test_redshift.py
+++ b/tests/unit/test_redshift.py
@@ -322,7 +322,6 @@ def test_spectrum(path, redshift_table, redshift_con, glue_database, redshift_ex
         assert len(rows) == len(df.index)
         for row in rows:
             assert len(row) == len(df.columns)
-    wr.catalog.delete_table_if_exists(database=glue_database, table=redshift_table)
 
 
 @pytest.mark.xfail(raises=NotImplementedError, reason="Unable to create pandas categorical from pyarrow table")
diff --git a/tests/unit/test_routines.py b/tests/unit/test_routines.py
index 436ede6e6..054759f9c 100644
--- a/tests/unit/test_routines.py
+++ b/tests/unit/test_routines.py
@@ -479,5 +479,3 @@ def test_routine_2(glue_database, glue_table, path):
     assert comments["c0"] == "zero"
     assert comments["c1"] == "one"
     assert comments["c2"] == "two"
-
-    wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
diff --git a/tests/unit/test_s3_parquet.py b/tests/unit/test_s3_parquet.py
index 4e158f3ad..1569b763e 100644
--- a/tests/unit/test_s3_parquet.py
+++ b/tests/unit/test_s3_parquet.py
@@ -222,7 +222,6 @@ def test_parquet_validate_schema(path):
 
 
 def test_parquet_uint64(path):
-    wr.s3.delete_objects(path=path)
     df = pd.DataFrame(
         {
             "c0": [0, 0, (2**8) - 1],
@@ -245,7 +244,6 @@ def test_parquet_uint64(path):
     assert df.c2.max() == (2**32) - 1
     assert df.c3.max() == (2**64) - 1
     assert df.c4.astype("uint8").sum() == 3
-    wr.s3.delete_objects(path=path)
 
 
 def test_parquet_metadata_partitions(path):
diff --git a/tests/unit/test_s3_text_compressed.py b/tests/unit/test_s3_text_compressed.py
index 15117755a..2c461a6af 100644
--- a/tests/unit/test_s3_text_compressed.py
+++ b/tests/unit/test_s3_text_compressed.py
@@ -3,7 +3,6 @@
 import logging
 import lzma
 from io import BytesIO, TextIOWrapper
-from sys import version_info
 from typing import Optional
 
 import boto3
@@ -99,40 +98,28 @@ def test_csv_write(path: str, compression: Optional[str]) -> None:
 
     path_file = f"{path}test.csv{EXT.get(compression, '')}"
     df = get_df_csv()
-    if version_info < (3, 7) and compression:
-        with pytest.raises(wr.exceptions.InvalidArgument):
-            wr.s3.to_csv(df, path_file, compression=compression, index=False, header=None)
-    else:
-        wr.s3.to_csv(df, path_file, compression=compression, index=False, header=None)
-        df2 = pd.read_csv(path_file, names=df.columns)
-        df3 = wr.s3.read_csv([path_file], names=df.columns)
-        assert df.shape == df2.shape == df3.shape
+    wr.s3.to_csv(df, path_file, compression=compression, index=False, header=None)
+    df2 = pd.read_csv(path_file, names=df.columns)
+    df3 = wr.s3.read_csv([path_file], names=df.columns)
+    assert df.shape == df2.shape == df3.shape
 
 
 @pytest.mark.parametrize("compression", ["gzip", "bz2", "xz", "zip", None])
 def test_csv_write_dataset_filename_extension(path: str, compression: Optional[str]) -> None:
     df = get_df_csv()
-    if version_info < (3, 7) and compression:
-        with pytest.raises(wr.exceptions.InvalidArgument):
-            wr.s3.to_csv(df, path, compression=compression, index=False, dataset=True)
-    else:
-        result = wr.s3.to_csv(df, path, compression=compression, index=False, dataset=True)
-        for p in result["paths"]:
-            assert p.endswith(f".csv{EXT.get(compression, '')}")
+    result = wr.s3.to_csv(df, path, compression=compression, index=False, dataset=True)
+    for p in result["paths"]:
+        assert p.endswith(f".csv{EXT.get(compression, '')}")
 
 
 @pytest.mark.parametrize("compression", ["gzip", "bz2", "xz", "zip", None])
 def test_json(path: str, compression: Optional[str]) -> None:
     path_file = f"{path}test.json{EXT.get(compression, '')}"
     df = pd.DataFrame({"id": [1, 2, 3]})
-    if version_info < (3, 7) and compression:
-        with pytest.raises(wr.exceptions.InvalidArgument):
-            wr.s3.to_json(df=df, path=path_file, compression=compression)
-    else:
-        wr.s3.to_json(df=df, path=path_file)
-        df2 = pd.read_json(path_file, compression=compression)
-        df3 = wr.s3.read_json(path=[path_file])
-        assert df.shape == df2.shape == df3.shape
+    wr.s3.to_json(df=df, path=path_file)
+    df2 = pd.read_json(path_file, compression=compression)
+    df3 = wr.s3.read_json(path=[path_file])
+    assert df.shape == df2.shape == df3.shape
 
 
 @pytest.mark.parametrize("chunksize", [None, 1])
@@ -146,34 +133,22 @@ def test_partitioned_json(path: str, compression: Optional[str], chunksize: Opti
             "month": [1, 2, 1, 2],
         }
     )
-    if version_info < (3, 7) and compression:
-        with pytest.raises(wr.exceptions.InvalidArgument):
-            wr.s3.to_json(
-                df,
-                path=path,
-                orient="records",
-                lines=True,
-                compression=compression,
-                dataset=True,
-                partition_cols=["year", "month"],
-            )
+    wr.s3.to_json(
+        df,
+        path=path,
+        orient="records",
+        lines=True,
+        compression=compression,
+        dataset=True,
+        partition_cols=["year", "month"],
+    )
+    df2 = wr.s3.read_json(path, dataset=True, chunksize=chunksize)
+    if chunksize is None:
+        assert df2.shape == (4, 4)
+        assert df2.c0.sum() == 6
     else:
-        wr.s3.to_json(
-            df,
-            path=path,
-            orient="records",
-            lines=True,
-            compression=compression,
-            dataset=True,
-            partition_cols=["year", "month"],
-        )
-        df2 = wr.s3.read_json(path, dataset=True, chunksize=chunksize)
-        if chunksize is None:
-            assert df2.shape == (4, 4)
-            assert df2.c0.sum() == 6
-        else:
-            for d in df2:
-                assert d.shape == (1, 4)
+        for d in df2:
+            assert d.shape == (1, 4)
 
 
 @pytest.mark.parametrize("chunksize", [None, 1])
@@ -181,17 +156,12 @@ def test_partitioned_json(path: str, compression: Optional[str], chunksize: Opti
 def test_partitioned_csv(path: str, compression: Optional[str], chunksize: Optional[int]) -> None:
     df = pd.DataFrame({"c0": [0, 1], "c1": ["foo", "boo"]})
     paths = [f"{path}year={y}/month={m}/0.csv{EXT.get(compression, '')}" for y, m in [(2020, 1), (2020, 2), (2021, 1)]]
-    if version_info < (3, 7) and compression:
-        with pytest.raises(wr.exceptions.InvalidArgument):
-            for p in paths:
-                wr.s3.to_csv(df, p, index=False, compression=compression)
+    for p in paths:
+        wr.s3.to_csv(df, p, index=False, compression=compression, header=True)
+    df2 = wr.s3.read_csv(path, dataset=True, chunksize=chunksize, header=0)
+    if chunksize is None:
+        assert df2.shape == (6, 4)
+        assert df2.c0.sum() == 3
     else:
-        for p in paths:
-            wr.s3.to_csv(df, p, index=False, compression=compression, header=True)
-        df2 = wr.s3.read_csv(path, dataset=True, chunksize=chunksize, header=0)
-        if chunksize is None:
-            assert df2.shape == (6, 4)
-            assert df2.c0.sum() == 3
-        else:
-            for d in df2:
-                assert d.shape == (1, 4)
+        for d in df2:
+            assert d.shape == (1, 4)

From 39ba26f548164ff36661604c794dc72fec8f533b Mon Sep 17 00:00:00 2001
From: Abdel Jaidi <jaidisido@gmail.com>
Date: Tue, 7 Mar 2023 14:26:05 +0000
Subject: [PATCH 2/4] xfail athena test

---
 tests/unit/test_athena.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_athena.py b/tests/unit/test_athena.py
index 0c14e3bf0..e36d986a3 100644
--- a/tests/unit/test_athena.py
+++ b/tests/unit/test_athena.py
@@ -374,11 +374,11 @@ def test_read_sql_query_parameter_formatting_null(path, glue_database, glue_tabl
     assert len(df.index) == 1
 
 
+@pytest.mark.xfail(raises=botocore.exceptions.InvalidRequestException, reason="QueryId not found.")
 def test_athena_query_cancelled(glue_database):
     query_execution_id = wr.athena.start_query_execution(
         sql="SELECT " + "rand(), " * 10000 + "rand()", database=glue_database
     )
-    time.sleep(2)  # On a cold start, Athena might not have started the query
     wr.athena.stop_query_execution(query_execution_id=query_execution_id)
     with pytest.raises(wr.exceptions.QueryCancelled):
         assert wr.athena.wait_query(query_execution_id=query_execution_id)

From 29d845c848ccee43f8e40d0ac966c2dfffc658fc Mon Sep 17 00:00:00 2001
From: Abdel Jaidi <jaidisido@gmail.com>
Date: Tue, 7 Mar 2023 15:16:24 +0000
Subject: [PATCH 3/4] Fix verbose warnings

---
 .../distributed/ray/modin/s3/_write_text.py   |  2 +-
 tests/load/test_s3.py                         | 26 +++++++------------
 tests/unit/test_athena.py                     |  3 +--
 3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/awswrangler/distributed/ray/modin/s3/_write_text.py b/awswrangler/distributed/ray/modin/s3/_write_text.py
index 451821ee4..5593cc5ae 100644
--- a/awswrangler/distributed/ray/modin/s3/_write_text.py
+++ b/awswrangler/distributed/ray/modin/s3/_write_text.py
@@ -101,7 +101,7 @@ def _to_text_distributed(  # pylint: disable=unused-argument
     # Create Ray Dataset
     ds = _ray_dataset_from_df(df)
 
-    # Repartition into a single block if or writing into a single key or if bucketing is enabled
+    # Repartition into a single block if writing into a single key or if bucketing is enabled
     if ds.count() > 0 and path:
         ds = ds.repartition(1)
         _logger.warning(
diff --git a/tests/load/test_s3.py b/tests/load/test_s3.py
index d2d883bf9..1d39e8a4e 100644
--- a/tests/load/test_s3.py
+++ b/tests/load/test_s3.py
@@ -20,7 +20,6 @@ def _modin_repartition(df: pd.DataFrame, num_blocks: int) -> pd.DataFrame:
     return dataset.to_modin()
 
 
-@pytest.mark.repeat(1)
 @pytest.mark.parametrize("benchmark_time", [150])
 def test_s3_select(benchmark_time: float, request: pytest.FixtureRequest) -> None:
     paths = [f"s3://ursa-labs-taxi-data/2018/{i}/data.parquet" for i in range(10, 13)]
@@ -96,7 +95,7 @@ def test_s3_read_parquet_many_files(
     assert timer.elapsed_time < benchmark_time
 
 
-@pytest.mark.parametrize("benchmark_time", [30])
+@pytest.mark.parametrize("benchmark_time", [40])
 def test_s3_read_parquet_partition_filter(benchmark_time: float, request: pytest.FixtureRequest) -> None:
     path = "s3://amazon-reviews-pds/parquet/"
     with ExecutionTimer(request, data_paths=path) as timer:
@@ -167,18 +166,15 @@ def test_s3_write_parquet_blocks(
 
 @pytest.mark.parametrize("benchmark_time", [5])
 def test_s3_delete_objects(path: str, path2: str, benchmark_time: float, request: pytest.FixtureRequest) -> None:
-    df = pd.DataFrame({"id": [1, 2, 3]})
-    objects_per_bucket = 505
-    paths1 = [f"{path}delete-test{i}.json" for i in range(objects_per_bucket)]
-    paths2 = [f"{path2}delete-test{i}.json" for i in range(objects_per_bucket)]
+    df = pd.DataFrame({"id": range(0, 505)})
+    paths1 = wr.s3.to_parquet(df=df, path=path, max_rows_by_file=1)["paths"]
+    paths2 = wr.s3.to_parquet(df=df, path=path2, max_rows_by_file=1)["paths"]
     paths = paths1 + paths2
-    for path in paths:
-        wr.s3.to_csv(df, path)
     with ExecutionTimer(request) as timer:
         wr.s3.delete_objects(path=paths)
     assert timer.elapsed_time < benchmark_time
-    assert len(wr.s3.list_objects(f"{path}delete-test*")) == 0
-    assert len(wr.s3.list_objects(f"{path2}delete-test*")) == 0
+    assert len(wr.s3.list_objects(path)) == 0
+    assert len(wr.s3.list_objects(path2)) == 0
 
 
 @pytest.mark.parametrize("benchmark_time", [20])
@@ -226,16 +222,12 @@ def test_s3_write_json(
 @pytest.mark.timeout(300)
 @pytest.mark.parametrize("benchmark_time", [15])
 def test_wait_object_exists(path: str, benchmark_time: int, request: pytest.FixtureRequest) -> None:
-    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5]})
-
-    num_objects = 200
-    file_paths = [f"{path}{i}.txt" for i in range(num_objects)]
+    df = pd.DataFrame({"c0": range(0, 4)})
 
-    for file_path in file_paths:
-        wr.s3.to_csv(df, file_path, index=True)
+    paths = wr.s3.to_parquet(df=df, path=path, max_rows_by_file=1)["paths"]
 
     with ExecutionTimer(request) as timer:
-        wr.s3.wait_objects_exist(file_paths, parallelism=16)
+        wr.s3.wait_objects_exist(paths, parallelism=16)
 
     assert timer.elapsed_time < benchmark_time
 
diff --git a/tests/unit/test_athena.py b/tests/unit/test_athena.py
index e36d986a3..d6f3fda83 100644
--- a/tests/unit/test_athena.py
+++ b/tests/unit/test_athena.py
@@ -1,7 +1,6 @@
 import datetime
 import logging
 import string
-import time
 from unittest.mock import patch
 
 import boto3
@@ -374,7 +373,7 @@ def test_read_sql_query_parameter_formatting_null(path, glue_database, glue_tabl
     assert len(df.index) == 1
 
 
-@pytest.mark.xfail(raises=botocore.exceptions.InvalidRequestException, reason="QueryId not found.")
+@pytest.mark.xfail(raises=botocore.exceptions.ClientError, reason="QueryId not found.")
 def test_athena_query_cancelled(glue_database):
     query_execution_id = wr.athena.start_query_execution(
         sql="SELECT " + "rand(), " * 10000 + "rand()", database=glue_database

From 6b419d0ee3cc26b199f21656a0a6dc318bafb43c Mon Sep 17 00:00:00 2001
From: Abdel Jaidi <jaidisido@gmail.com>
Date: Tue, 7 Mar 2023 16:26:10 +0000
Subject: [PATCH 4/4] Minor - 200 instead of 4

---
 tests/load/test_s3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/load/test_s3.py b/tests/load/test_s3.py
index 1d39e8a4e..462bcce62 100644
--- a/tests/load/test_s3.py
+++ b/tests/load/test_s3.py
@@ -222,7 +222,7 @@ def test_s3_write_json(
 @pytest.mark.timeout(300)
 @pytest.mark.parametrize("benchmark_time", [15])
 def test_wait_object_exists(path: str, benchmark_time: int, request: pytest.FixtureRequest) -> None:
-    df = pd.DataFrame({"c0": range(0, 4)})
+    df = pd.DataFrame({"c0": range(0, 200)})
 
     paths = wr.s3.to_parquet(df=df, path=path, max_rows_by_file=1)["paths"]