diff --git a/.github/workflows/static-checking.yml b/.github/workflows/static-checking.yml
index 76291af88..11133f8da 100644
--- a/.github/workflows/static-checking.yml
+++ b/.github/workflows/static-checking.yml
@@ -36,3 +36,7 @@ jobs:
         run: flake8 setup.py awswrangler testing/test_awswrangler
       - name: Pylint Lint
         run: pylint -j 0 awswrangler
+      - name: Black style
+        run: black --check --line-length 120 --target-version py36 awswrangler testing/test_awswrangler
+      - name: Imports order check (isort)
+        run: isort -rc --check-only awswrangler testing/test_awswrangler
diff --git a/.isort.cfg b/.isort.cfg
new file mode 100644
index 000000000..f8e61f2f3
--- /dev/null
+++ b/.isort.cfg
@@ -0,0 +1,6 @@
+[settings]
+multi_line_output=3
+include_trailing_comma=True
+force_grid_wrap=0
+use_parentheses=True
+line_length=120
diff --git a/README.md b/README.md
index 0f01a2d7c..cabd20ea3 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 
 ![AWS Data Wrangler](docs/source/_static/logo2.png?raw=true "AWS Data Wrangler")
 
-[![Release](https://img.shields.io/badge/release-1.3.0-brightgreen.svg)](https://pypi.org/project/awswrangler/)
+[![Release](https://img.shields.io/badge/release-1.4.0-brightgreen.svg)](https://pypi.org/project/awswrangler/)
 [![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8-brightgreen.svg)](https://anaconda.org/conda-forge/awswrangler)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
@@ -63,23 +63,23 @@ df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine)
   - [EMR](https://aws-data-wrangler.readthedocs.io/en/latest/install.html#emr)
   - [From source](https://aws-data-wrangler.readthedocs.io/en/latest/install.html#from-source)
 - [**Tutorials**](https://github.com/awslabs/aws-data-wrangler/tree/master/tutorials)
-  - [01 - Introduction](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/01%20-%20Introduction.ipynb)
-  - [02 - Sessions](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/02%20-%20Sessions.ipynb)
-  - [03 - Amazon S3](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/03%20-%20Amazon%20S3.ipynb)
-  - [04 - Parquet Datasets](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/04%20-%20Parquet%20Datasets.ipynb)
-  - [05 - Glue Catalog](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/05%20-%20Glue%20Catalog.ipynb)
-  - [06 - Amazon Athena](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/06%20-%20Amazon%20Athena.ipynb)
-  - [07 - Databases (Redshift, MySQL and PostgreSQL)](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/07%20-%20Redshift%2C%20MySQL%2C%20PostgreSQL.ipynb)
-  - [08 - Redshift - Copy & Unload.ipynb](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/08%20-%20Redshift%20-%20Copy%20%26%20Unload.ipynb)
-  - [09 - Redshift - Append, Overwrite and Upsert](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/09%20-%20Redshift%20-%20Append%2C%20Overwrite%2C%20Upsert.ipynb)
-  - [10 - Parquet Crawler](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/10%20-%20Parquet%20Crawler.ipynb)
-  - [11 - CSV Datasets](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/11%20-%20CSV%20Datasets.ipynb)
-  - [12 - CSV Crawler](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/12%20-%20CSV%20Crawler.ipynb)
-  - [13 - Merging Datasets on S3](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/13%20-%20Merging%20Datasets%20on%20S3.ipynb)
-  - [14 - Schema Evolution](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/14%20-%20Schema%20Evolution.ipynb)
-  - [15 - EMR](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/15%20-%20EMR.ipynb)
-  - [16 - EMR & Docker](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/16%20-%20EMR%20%26%20Docker.ipynb)
-  - [17 - Partition Projection](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/17%20-%20Partition%20Projection.ipynb)
+  - [001 - Introduction](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/001%20-%20Introduction.ipynb)
+  - [002 - Sessions](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/002%20-%20Sessions.ipynb)
+  - [003 - Amazon S3](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/003%20-%20Amazon%20S3.ipynb)
+  - [004 - Parquet Datasets](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/004%20-%20Parquet%20Datasets.ipynb)
+  - [005 - Glue Catalog](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/005%20-%20Glue%20Catalog.ipynb)
+  - [006 - Amazon Athena](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/006%20-%20Amazon%20Athena.ipynb)
+  - [007 - Databases (Redshift, MySQL and PostgreSQL)](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/007%20-%20Redshift%2C%20MySQL%2C%20PostgreSQL.ipynb)
+  - [008 - Redshift - Copy & Unload.ipynb](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/008%20-%20Redshift%20-%20Copy%20%26%20Unload.ipynb)
+  - [009 - Redshift - Append, Overwrite and Upsert](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/009%20-%20Redshift%20-%20Append%2C%20Overwrite%2C%20Upsert.ipynb)
+  - [010 - Parquet Crawler](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/010%20-%20Parquet%20Crawler.ipynb)
+  - [011 - CSV Datasets](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/011%20-%20CSV%20Datasets.ipynb)
+  - [012 - CSV Crawler](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/012%20-%20CSV%20Crawler.ipynb)
+  - [013 - Merging Datasets on S3](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/013%20-%20Merging%20Datasets%20on%20S3.ipynb)
+  - [014 - Schema Evolution](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/014%20-%20Schema%20Evolution.ipynb)
+  - [015 - EMR](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/015%20-%20EMR.ipynb)
+  - [016 - EMR & Docker](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/016%20-%20EMR%20%26%20Docker.ipynb)
+  - [017 - Partition Projection](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/017%20-%20Partition%20Projection.ipynb)
 - [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/latest/api.html)
   - [Amazon S3](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#amazon-s3)
   - [AWS Glue Catalog](https://aws-data-wrangler.readthedocs.io/en/latest/api.html#aws-glue-catalog)
diff --git a/awswrangler/__metadata__.py b/awswrangler/__metadata__.py
index 26d9ff44e..dc3dcb059 100644
--- a/awswrangler/__metadata__.py
+++ b/awswrangler/__metadata__.py
@@ -7,5 +7,5 @@
 
 __title__ = "awswrangler"
 __description__ = "Pandas on AWS."
-__version__ = "1.3.0"
+__version__ = "1.4.0"
 __license__ = "Apache License 2.0"
diff --git a/awswrangler/_utils.py b/awswrangler/_utils.py
index b869d78c1..c399701f8 100644
--- a/awswrangler/_utils.py
+++ b/awswrangler/_utils.py
@@ -203,10 +203,10 @@ def get_region_from_session(boto3_session: Optional[boto3.Session] = None, defau
     )  # pragma: no cover
 
 
-def extract_partitions_from_paths(
+def extract_partitions_metadata_from_paths(
     path: str, paths: List[str]
 ) -> Tuple[Optional[Dict[str, str]], Optional[Dict[str, List[str]]]]:
-    """Extract partitions from Amazon S3 paths."""
+    """Extract partitions metadata from Amazon S3 paths."""
     path = path if path.endswith("/") else f"{path}/"
     partitions_types: Dict[str, str] = {}
     partitions_values: Dict[str, List[str]] = {}
@@ -217,7 +217,7 @@ def extract_partitions_from_paths(
             )  # pragma: no cover
         path_wo_filename: str = p.rpartition("/")[0] + "/"
         if path_wo_filename not in partitions_values:
-            path_wo_prefix: str = p.replace(f"{path}/", "")
+            path_wo_prefix: str = path_wo_filename.replace(f"{path}/", "")
             dirs: List[str] = [x for x in path_wo_prefix.split("/") if (x != "") and ("=" in x)]
             if dirs:
                 values_tups: List[Tuple[str, str]] = [tuple(x.split("=")[:2]) for x in dirs]  # type: ignore
@@ -238,6 +238,23 @@ def extract_partitions_from_paths(
     return partitions_types, partitions_values
 
 
+def extract_partitions_from_path(path_root: str, path: str) -> Dict[str, Any]:
+    """Extract partitions values and names from Amazon S3 path."""
+    path_root = path_root if path_root.endswith("/") else f"{path_root}/"
+    if path_root not in path:
+        raise exceptions.InvalidArgumentValue(
+            f"Object {path} is not under the root path ({path_root})."
+        )  # pragma: no cover
+    path_wo_filename: str = path.rpartition("/")[0] + "/"
+    path_wo_prefix: str = path_wo_filename.replace(f"{path_root}/", "")
+    dirs: List[str] = [x for x in path_wo_prefix.split("/") if (x != "") and ("=" in x)]
+    if not dirs:
+        return {}  # pragma: no cover
+    values_tups: List[Tuple[str, str]] = [tuple(x.split("=")[:2]) for x in dirs]  # type: ignore
+    values_dics: Dict[str, str] = dict(values_tups)
+    return values_dics
+
+
 def list_sampling(lst: List[Any], sampling: float) -> List[Any]:
     """Random List sampling."""
     if sampling > 1.0 or sampling <= 0.0:  # pragma: no cover
diff --git a/awswrangler/s3.py b/awswrangler/s3.py
index b13ccff63..d82df8567 100644
--- a/awswrangler/s3.py
+++ b/awswrangler/s3.py
@@ -677,6 +677,8 @@ def to_csv(  # pylint: disable=too-many-arguments
             raise exceptions.InvalidArgumentCombination("Please, pass dataset=True to be able to use partition_cols.")
         if mode is not None:
             raise exceptions.InvalidArgumentCombination("Please pass dataset=True to be able to use mode.")
+        if columns_comments:
+            raise exceptions.InvalidArgumentCombination("Please pass dataset=True to be able to use columns_comments.")
         if any(arg is not None for arg in (database, table, description, parameters)):
             raise exceptions.InvalidArgumentCombination(
                 "Please pass dataset=True to be able to use any one of these "
@@ -887,14 +889,16 @@ def _to_text(
         raise exceptions.EmptyDataFrame()
     if fs is None:
         fs = _utils.get_fs(session=boto3_session, s3_additional_kwargs=s3_additional_kwargs)
-    with fs.open(path, "w") as f:
+    encoding: Optional[str] = pandas_kwargs.get("encoding", None)
+    newline: Optional[str] = pandas_kwargs.get("line_terminator", None)
+    with fs.open(path=path, mode="w", encoding=encoding, newline=newline) as f:
         if file_format == "csv":
             df.to_csv(f, **pandas_kwargs)
         elif file_format == "json":
             df.to_json(f, **pandas_kwargs)
 
 
-def to_parquet(  # pylint: disable=too-many-arguments
+def to_parquet(  # pylint: disable=too-many-arguments,too-many-locals
     df: pd.DataFrame,
     path: str,
     index: bool = False,
@@ -1153,9 +1157,14 @@ def to_parquet(  # pylint: disable=too-many-arguments
                 "arguments: database, table, description, parameters, "
                 "columns_comments."
             )
+        df = _data_types.cast_pandas_with_athena_types(df=df, dtype=dtype)
+        schema: pa.Schema = _data_types.pyarrow_schema_from_pandas(
+            df=df, index=index, ignore_cols=partition_cols, dtype=dtype
+        )
+        _logger.debug("schema: \n%s", schema)
         paths = [
             _to_parquet_file(
-                df=df, path=path, schema=None, index=index, compression=compression, cpus=cpus, fs=fs, dtype=dtype
+                df=df, path=path, schema=schema, index=index, compression=compression, cpus=cpus, fs=fs, dtype=dtype
             )
         ]
     else:
@@ -1314,6 +1323,7 @@ def read_csv(
     boto3_session: Optional[boto3.Session] = None,
     s3_additional_kwargs: Optional[Dict[str, str]] = None,
     chunksize: Optional[int] = None,
+    dataset: bool = False,
     **pandas_kwargs,
 ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]:
     """Read CSV file(s) from from a received S3 prefix or list of S3 objects paths.
@@ -1340,6 +1350,8 @@ def read_csv(
         https://s3fs.readthedocs.io/en/latest/#serverside-encryption
     chunksize: int, optional
         If specified, return an generator where chunksize is the number of rows to include in each chunk.
+    dataset: bool
+        If `True` read a CSV dataset instead of simple file(s) loading all the related partitions as columns.
     pandas_kwargs:
         keyword arguments forwarded to pandas.read_csv().
         https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
@@ -1387,6 +1399,7 @@ def read_csv(
         boto3_session=boto3_session,
         s3_additional_kwargs=s3_additional_kwargs,
         chunksize=chunksize,
+        dataset=dataset,
         **pandas_kwargs,
     )
 
@@ -1397,6 +1410,7 @@ def read_fwf(
     boto3_session: Optional[boto3.Session] = None,
     s3_additional_kwargs: Optional[Dict[str, str]] = None,
     chunksize: Optional[int] = None,
+    dataset: bool = False,
     **pandas_kwargs,
 ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]:
     """Read fixed-width formatted file(s) from from a received S3 prefix or list of S3 objects paths.
@@ -1423,6 +1437,8 @@ def read_fwf(
         https://s3fs.readthedocs.io/en/latest/#serverside-encryption
     chunksize: int, optional
         If specified, return an generator where chunksize is the number of rows to include in each chunk.
+    dataset: bool
+        If `True` read a FWF dataset instead of simple file(s) loading all the related partitions as columns.
     pandas_kwargs:
         keyword arguments forwarded to pandas.read_fwf().
         https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_fwf.html
@@ -1470,6 +1486,7 @@ def read_fwf(
         boto3_session=boto3_session,
         s3_additional_kwargs=s3_additional_kwargs,
         chunksize=chunksize,
+        dataset=dataset,
         **pandas_kwargs,
     )
 
@@ -1480,6 +1497,7 @@ def read_json(
     boto3_session: Optional[boto3.Session] = None,
     s3_additional_kwargs: Optional[Dict[str, str]] = None,
     chunksize: Optional[int] = None,
+    dataset: bool = False,
     **pandas_kwargs,
 ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]:
     """Read JSON file(s) from from a received S3 prefix or list of S3 objects paths.
@@ -1506,6 +1524,9 @@ def read_json(
         https://s3fs.readthedocs.io/en/latest/#serverside-encryption
     chunksize: int, optional
         If specified, return an generator where chunksize is the number of rows to include in each chunk.
+    dataset: bool
+        If `True` read a JSON dataset instead of simple file(s) loading all the related partitions as columns.
+        If `True`, the `lines=True` will be assumed by default.
     pandas_kwargs:
         keyword arguments forwarded to pandas.read_json().
         https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_json.html
@@ -1546,6 +1567,8 @@ def read_json(
     >>>     print(df)  # 100 lines Pandas DataFrame
 
     """
+    if (dataset is True) and ("lines" not in pandas_kwargs):
+        pandas_kwargs["lines"] = True
     return _read_text(
         parser_func=pd.read_json,
         path=path,
@@ -1553,6 +1576,7 @@ def read_json(
         boto3_session=boto3_session,
         s3_additional_kwargs=s3_additional_kwargs,
         chunksize=chunksize,
+        dataset=dataset,
         **pandas_kwargs,
     )
 
@@ -1564,11 +1588,18 @@ def _read_text(
     boto3_session: Optional[boto3.Session] = None,
     s3_additional_kwargs: Optional[Dict[str, str]] = None,
     chunksize: Optional[int] = None,
+    dataset: bool = False,
     **pandas_kwargs,
 ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]:
     if "iterator" in pandas_kwargs:
         raise exceptions.InvalidArgument("Please, use chunksize instead of iterator.")
     session: boto3.Session = _utils.ensure_session(session=boto3_session)
+    if (dataset is True) and (not isinstance(path, str)):  # pragma: no cover
+        raise exceptions.InvalidArgument("The path argument must be a string Amazon S3 prefix if dataset=True.")
+    if dataset is True:
+        path_root: str = str(path)
+    else:
+        path_root = ""
     paths: List[str] = _path2list(path=path, boto3_session=session)
     _logger.debug("paths:\n%s", paths)
     if chunksize is not None:
@@ -1579,6 +1610,8 @@ def _read_text(
             chunksize=chunksize,
             pandas_args=pandas_kwargs,
             s3_additional_kwargs=s3_additional_kwargs,
+            dataset=dataset,
+            path_root=path_root,
         )
         return dfs
     if (use_threads is False) or (boto3_session is not None):
@@ -1590,6 +1623,8 @@ def _read_text(
                     boto3_session=session,
                     pandas_args=pandas_kwargs,
                     s3_additional_kwargs=s3_additional_kwargs,
+                    dataset=dataset,
+                    path_root=path_root,
                 )
                 for p in paths
             ],
@@ -1603,10 +1638,12 @@ def _read_text(
                 objs=executor.map(
                     _read_text_full,
                     repeat(parser_func),
+                    repeat(path_root),
                     paths,
                     repeat(None),  # Boto3.Session
                     repeat(pandas_kwargs),
                     repeat(s3_additional_kwargs),
+                    repeat(dataset),
                 ),
                 ignore_index=True,
                 sort=False,
@@ -1616,37 +1653,54 @@ def _read_text(
 
 def _read_text_chunksize(
     parser_func: Callable,
+    path_root: str,
     paths: List[str],
     boto3_session: boto3.Session,
     chunksize: int,
     pandas_args: Dict[str, Any],
     s3_additional_kwargs: Optional[Dict[str, str]] = None,
+    dataset: bool = False,
 ) -> Iterator[pd.DataFrame]:
     fs: s3fs.S3FileSystem = _utils.get_fs(session=boto3_session, s3_additional_kwargs=s3_additional_kwargs)
     for path in paths:
         _logger.debug("path: %s", path)
+        partitions: Dict[str, Any] = {}
+        if dataset is True:
+            partitions = _utils.extract_partitions_from_path(path_root=path_root, path=path)
         if pandas_args.get("compression", "infer") == "infer":
             pandas_args["compression"] = infer_compression(path, compression="infer")
         mode: str = "r" if pandas_args.get("compression") is None else "rb"
         with fs.open(path, mode) as f:
             reader: pandas.io.parsers.TextFileReader = parser_func(f, chunksize=chunksize, **pandas_args)
             for df in reader:
+                if dataset is True:
+                    for column_name, value in partitions.items():
+                        df[column_name] = value
                 yield df
 
 
 def _read_text_full(
     parser_func: Callable,
+    path_root: str,
     path: str,
     boto3_session: boto3.Session,
     pandas_args: Dict[str, Any],
     s3_additional_kwargs: Optional[Dict[str, str]] = None,
+    dataset: bool = False,
 ) -> pd.DataFrame:
     fs: s3fs.S3FileSystem = _utils.get_fs(session=boto3_session, s3_additional_kwargs=s3_additional_kwargs)
     if pandas_args.get("compression", "infer") == "infer":
         pandas_args["compression"] = infer_compression(path, compression="infer")
     mode: str = "r" if pandas_args.get("compression") is None else "rb"
-    with fs.open(path, mode) as f:
-        return parser_func(f, **pandas_args)
+    encoding: Optional[str] = pandas_args.get("encoding", None)
+    newline: Optional[str] = pandas_args.get("lineterminator", None)
+    with fs.open(path=path, mode=mode, encoding=encoding, newline=newline) as f:
+        df: pd.DataFrame = parser_func(f, **pandas_args)
+    if dataset is True:
+        partitions: Dict[str, Any] = _utils.extract_partitions_from_path(path_root=path_root, path=path)
+        for column_name, value in partitions.items():
+            df[column_name] = value
+    return df
 
 
 def _read_parquet_init(
@@ -1660,14 +1714,15 @@ def _read_parquet_init(
     s3_additional_kwargs: Optional[Dict[str, str]] = None,
 ) -> pyarrow.parquet.ParquetDataset:
     """Encapsulate all initialization before the use of the pyarrow.parquet.ParquetDataset."""
+    session: boto3.Session = _utils.ensure_session(session=boto3_session)
     if dataset is False:
-        path_or_paths: Union[str, List[str]] = _path2list(path=path, boto3_session=boto3_session)
+        path_or_paths: Union[str, List[str]] = _path2list(path=path, boto3_session=session)
     elif isinstance(path, str):
         path_or_paths = path[:-1] if path.endswith("/") else path
     else:
         path_or_paths = path
     _logger.debug("path_or_paths: %s", path_or_paths)
-    fs: s3fs.S3FileSystem = _utils.get_fs(session=boto3_session, s3_additional_kwargs=s3_additional_kwargs)
+    fs: s3fs.S3FileSystem = _utils.get_fs(session=session, s3_additional_kwargs=s3_additional_kwargs)
     cpus: int = _utils.ensure_cpu_count(use_threads=use_threads)
     data: pyarrow.parquet.ParquetDataset = pyarrow.parquet.ParquetDataset(
         path_or_paths=path_or_paths,
@@ -1677,6 +1732,7 @@ def _read_parquet_init(
         read_dictionary=categories,
         validate_schema=validate_schema,
         split_row_groups=False,
+        use_legacy_dataset=True,
     )
     return data
 
@@ -1723,7 +1779,8 @@ def read_parquet(
     path : Union[str, List[str]]
         S3 prefix (e.g. s3://bucket/prefix) or list of S3 objects paths (e.g. [s3://bucket/key0, s3://bucket/key1]).
     filters: Union[List[Tuple], List[List[Tuple]]], optional
-        List of filters to apply, like ``[[('x', '=', 0), ...], ...]``.
+        List of filters to apply on PARTITION columns (PUSH-DOWN filter), like ``[[('x', '=', 0), ...], ...]``.
+        Ignored if `dataset=False`.
     columns : List[str], optional
         Names of columns to read from the file(s).
     validate_schema:
@@ -1994,7 +2051,7 @@ def _read_parquet_metadata(
     partitions_types: Optional[Dict[str, str]] = None
     partitions_values: Optional[Dict[str, List[str]]] = None
     if (dataset is True) and (_path is not None):
-        partitions_types, partitions_values = _utils.extract_partitions_from_paths(path=_path, paths=paths)
+        partitions_types, partitions_values = _utils.extract_partitions_metadata_from_paths(path=_path, paths=paths)
     if dtype:
         for k, v in dtype.items():
             if columns_types and k in columns_types:
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 4850658c4..e0abc8e4a 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -19,4 +19,4 @@ wheel~=0.34.2
 sphinx~=3.0.4
 sphinx_bootstrap_theme~=0.7.1
 moto~=1.3.14
-jupyterlab~=2.1.3
\ No newline at end of file
+jupyterlab~=2.1.4
\ No newline at end of file
diff --git a/testing/test_awswrangler/_utils.py b/testing/test_awswrangler/_utils.py
index 40481c689..4375e7ab5 100644
--- a/testing/test_awswrangler/_utils.py
+++ b/testing/test_awswrangler/_utils.py
@@ -1,9 +1,13 @@
 import random
+import time
 from datetime import datetime
 from decimal import Decimal
 
+import boto3
 import pandas as pd
 
+import awswrangler as wr
+
 ts = lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")  # noqa
 dt = lambda x: datetime.strptime(x, "%Y-%m-%d").date()  # noqa
 
@@ -407,3 +411,26 @@ def ensure_data_types_csv(df):
 def get_time_str_with_random_suffix():
     time_str = datetime.utcnow().strftime("%Y%m%d%H%M%S%f")
     return f"{time_str}_{random.randrange(16**4):04x}"
+
+
+def path_generator(bucket):
+    s3_path = f"s3://{bucket}/{get_time_str_with_random_suffix()}/"
+    print(f"S3 Path: {s3_path}")
+    time.sleep(1)
+    objs = wr.s3.list_objects(s3_path)
+    wr.s3.delete_objects(path=objs)
+    wr.s3.wait_objects_not_exist(objs)
+    yield s3_path
+    time.sleep(1)
+    objs = wr.s3.list_objects(s3_path)
+    wr.s3.delete_objects(path=objs)
+    wr.s3.wait_objects_not_exist(objs)
+
+
+def extract_cloudformation_outputs():
+    response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler")
+    stack = [x for x in response.get("Stacks") if x["StackStatus"] in CFN_VALID_STATUS][0]
+    outputs = {}
+    for output in stack.get("Outputs"):
+        outputs[output.get("OutputKey")] = output.get("OutputValue")
+    return outputs
diff --git a/testing/test_awswrangler/test_athena_projection.py b/testing/test_awswrangler/test_athena_projection.py
deleted file mode 100644
index 66506ac37..000000000
--- a/testing/test_awswrangler/test_athena_projection.py
+++ /dev/null
@@ -1,170 +0,0 @@
-import logging
-import time
-
-import boto3
-import pandas as pd
-import pytest
-
-import awswrangler as wr
-
-from ._utils import CFN_VALID_STATUS, dt, get_time_str_with_random_suffix, ts
-
-logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s][%(name)s][%(funcName)s] %(message)s")
-logging.getLogger("awswrangler").setLevel(logging.DEBUG)
-logging.getLogger("botocore.credentials").setLevel(logging.CRITICAL)
-
-
-@pytest.fixture(scope="module")
-def cloudformation_outputs():
-    response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler")
-    stack = [x for x in response.get("Stacks") if x["StackStatus"] in CFN_VALID_STATUS][0]
-    outputs = {}
-    for output in stack.get("Outputs"):
-        outputs[output.get("OutputKey")] = output.get("OutputValue")
-    yield outputs
-
-
-@pytest.fixture(scope="module")
-def region(cloudformation_outputs):
-    yield cloudformation_outputs["Region"]
-
-
-@pytest.fixture(scope="module")
-def bucket(cloudformation_outputs):
-    yield cloudformation_outputs["BucketName"]
-
-
-@pytest.fixture(scope="module")
-def database(cloudformation_outputs):
-    yield cloudformation_outputs["GlueDatabaseName"]
-
-
-@pytest.fixture(scope="module")
-def external_schema(cloudformation_outputs, database):
-    region = cloudformation_outputs.get("Region")
-    sql = f"""
-    CREATE EXTERNAL SCHEMA IF NOT EXISTS aws_data_wrangler_external FROM data catalog
-    DATABASE '{database}'
-    IAM_ROLE '{cloudformation_outputs["RedshiftRole"]}'
-    REGION '{region}';
-    """
-    engine = wr.catalog.get_engine(connection="aws-data-wrangler-redshift")
-    with engine.connect() as con:
-        con.execute(sql)
-    yield "aws_data_wrangler_external"
-
-
-@pytest.fixture(scope="function")
-def path(bucket):
-    s3_path = f"s3://{bucket}/{get_time_str_with_random_suffix()}/"
-    print(f"S3 Path: {s3_path}")
-    time.sleep(1)
-    objs = wr.s3.list_objects(s3_path)
-    wr.s3.delete_objects(path=objs)
-    wr.s3.wait_objects_not_exist(objs)
-    yield s3_path
-    time.sleep(1)
-    objs = wr.s3.list_objects(s3_path)
-    wr.s3.delete_objects(path=objs)
-    wr.s3.wait_objects_not_exist(objs)
-
-
-@pytest.fixture(scope="function")
-def table(database):
-    name = f"tbl_{get_time_str_with_random_suffix()}"
-    print(f"Table name: {name}")
-    wr.catalog.delete_table_if_exists(database=database, table=name)
-    yield name
-    wr.catalog.delete_table_if_exists(database=database, table=name)
-
-
-def test_to_parquet_projection_integer(database, table, path):
-    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [0, 1, 2], "c2": [0, 100, 200], "c3": [0, 1, 2]})
-    paths = wr.s3.to_parquet(
-        df=df,
-        path=path,
-        dataset=True,
-        database=database,
-        table=table,
-        partition_cols=["c1", "c2", "c3"],
-        regular_partitions=False,
-        projection_enabled=True,
-        projection_types={"c1": "integer", "c2": "integer", "c3": "integer"},
-        projection_ranges={"c1": "0,2", "c2": "0,200", "c3": "0,2"},
-        projection_intervals={"c2": "100"},
-        projection_digits={"c3": "1"},
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.athena.read_sql_table(table, database)
-    assert df.shape == df2.shape
-    assert df.c0.sum() == df2.c0.sum()
-    assert df.c1.sum() == df2.c1.sum()
-    assert df.c2.sum() == df2.c2.sum()
-    assert df.c3.sum() == df2.c3.sum()
-
-
-def test_to_parquet_projection_enum(database, table, path):
-    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [1, 2, 3], "c2": ["foo", "boo", "bar"]})
-    paths = wr.s3.to_parquet(
-        df=df,
-        path=path,
-        dataset=True,
-        database=database,
-        table=table,
-        partition_cols=["c1", "c2"],
-        regular_partitions=False,
-        projection_enabled=True,
-        projection_types={"c1": "enum", "c2": "enum"},
-        projection_values={"c1": "1,2,3", "c2": "foo,boo,bar"},
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.athena.read_sql_table(table, database)
-    assert df.shape == df2.shape
-    assert df.c0.sum() == df2.c0.sum()
-    assert df.c1.sum() == df2.c1.sum()
-
-
-def test_to_parquet_projection_date(database, table, path):
-    df = pd.DataFrame(
-        {
-            "c0": [0, 1, 2],
-            "c1": [dt("2020-01-01"), dt("2020-01-02"), dt("2020-01-03")],
-            "c2": [ts("2020-01-01 01:01:01.0"), ts("2020-01-01 01:01:02.0"), ts("2020-01-01 01:01:03.0")],
-        }
-    )
-    paths = wr.s3.to_parquet(
-        df=df,
-        path=path,
-        dataset=True,
-        database=database,
-        table=table,
-        partition_cols=["c1", "c2"],
-        regular_partitions=False,
-        projection_enabled=True,
-        projection_types={"c1": "date", "c2": "date"},
-        projection_ranges={"c1": "2020-01-01,2020-01-03", "c2": "2020-01-01 01:01:00,2020-01-01 01:01:03"},
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.athena.read_sql_table(table, database)
-    print(df2)
-    assert df.shape == df2.shape
-    assert df.c0.sum() == df2.c0.sum()
-
-
-def test_to_parquet_projection_injected(database, table, path):
-    df = pd.DataFrame({"c0": [0, 1, 2], "c1": ["foo", "boo", "bar"], "c2": ["0", "1", "2"]})
-    paths = wr.s3.to_parquet(
-        df=df,
-        path=path,
-        dataset=True,
-        database=database,
-        table=table,
-        partition_cols=["c1", "c2"],
-        regular_partitions=False,
-        projection_enabled=True,
-        projection_types={"c1": "injected", "c2": "injected"},
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.athena.read_sql_query(f"SELECT * FROM {table} WHERE c1='foo' AND c2='0'", database)
-    assert df2.shape == (1, 3)
-    assert df2.c0.iloc[0] == 0
diff --git a/testing/test_awswrangler/test_cloudwatch.py b/testing/test_awswrangler/test_cloudwatch.py
index 6ac4e527a..592080510 100644
--- a/testing/test_awswrangler/test_cloudwatch.py
+++ b/testing/test_awswrangler/test_cloudwatch.py
@@ -7,7 +7,7 @@
 import awswrangler as wr
 from awswrangler import exceptions
 
-from ._utils import CFN_VALID_STATUS
+from ._utils import extract_cloudformation_outputs
 
 logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s][%(name)s][%(funcName)s] %(message)s")
 logging.getLogger("awswrangler").setLevel(logging.DEBUG)
@@ -16,12 +16,7 @@
 
 @pytest.fixture(scope="module")
 def cloudformation_outputs():
-    response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler")
-    stack = [x for x in response.get("Stacks") if x["StackStatus"] in CFN_VALID_STATUS][0]
-    outputs = {}
-    for output in stack.get("Outputs"):
-        outputs[output.get("OutputKey")] = output.get("OutputValue")
-    yield outputs
+    yield extract_cloudformation_outputs()
 
 
 @pytest.fixture(scope="module")
diff --git a/testing/test_awswrangler/test_data_lake.py b/testing/test_awswrangler/test_data_lake.py
index bcd6e3f6d..19fb1ca19 100644
--- a/testing/test_awswrangler/test_data_lake.py
+++ b/testing/test_awswrangler/test_data_lake.py
@@ -1,11 +1,9 @@
 import bz2
 import datetime
 import gzip
-import itertools
 import logging
 import lzma
 import math
-import time
 from io import BytesIO, TextIOWrapper
 
 import boto3
@@ -14,9 +12,20 @@
 
 import awswrangler as wr
 
-from ._utils import (CFN_VALID_STATUS, ensure_data_types, ensure_data_types_category, ensure_data_types_csv, get_df,
-                     get_df_cast, get_df_category, get_df_csv, get_df_list, get_query_long,
-                     get_time_str_with_random_suffix)
+from ._utils import (
+    ensure_data_types,
+    ensure_data_types_category,
+    ensure_data_types_csv,
+    extract_cloudformation_outputs,
+    get_df,
+    get_df_cast,
+    get_df_category,
+    get_df_csv,
+    get_df_list,
+    get_query_long,
+    get_time_str_with_random_suffix,
+    path_generator,
+)
 
 logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s][%(name)s][%(funcName)s] %(message)s")
 logging.getLogger("awswrangler").setLevel(logging.DEBUG)
@@ -25,12 +34,7 @@
 
 @pytest.fixture(scope="module")
 def cloudformation_outputs():
-    response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler")
-    stack = [x for x in response.get("Stacks") if x["StackStatus"] in CFN_VALID_STATUS][0]
-    outputs = {}
-    for output in stack.get("Outputs"):
-        outputs[output.get("OutputKey")] = output.get("OutputValue")
-    yield outputs
+    yield extract_cloudformation_outputs()
 
 
 @pytest.fixture(scope="module")
@@ -161,21 +165,6 @@ def workgroup3(bucket, kms_key):
     yield wkg_name
 
 
-@pytest.fixture(scope="function")
-def path(bucket):
-    s3_path = f"s3://{bucket}/{get_time_str_with_random_suffix()}/"
-    print(f"S3 Path: {s3_path}")
-    time.sleep(1)
-    objs = wr.s3.list_objects(s3_path)
-    wr.s3.delete_objects(path=objs)
-    wr.s3.wait_objects_not_exist(objs)
-    yield s3_path
-    time.sleep(1)
-    objs = wr.s3.list_objects(s3_path)
-    wr.s3.delete_objects(path=objs)
-    wr.s3.wait_objects_not_exist(objs)
-
-
 @pytest.fixture(scope="function")
 def table(database):
     name = f"tbl_{get_time_str_with_random_suffix()}"
@@ -185,21 +174,6 @@ def table(database):
     wr.catalog.delete_table_if_exists(database=database, table=name)
 
 
-@pytest.fixture(scope="function")
-def path2(bucket):
-    s3_path = f"s3://{bucket}/{get_time_str_with_random_suffix()}/"
-    print(f"S3 Path: {s3_path}")
-    time.sleep(1)
-    objs = wr.s3.list_objects(s3_path)
-    wr.s3.delete_objects(path=objs)
-    wr.s3.wait_objects_not_exist(objs)
-    yield s3_path
-    time.sleep(1)
-    objs = wr.s3.list_objects(s3_path)
-    wr.s3.delete_objects(path=objs)
-    wr.s3.wait_objects_not_exist(objs)
-
-
 @pytest.fixture(scope="function")
 def table2(database):
     name = f"tbl_{get_time_str_with_random_suffix()}"
@@ -209,1976 +183,1815 @@ def table2(database):
     wr.catalog.delete_table_if_exists(database=database, table=name)
 
 
-def test_athena_ctas(bucket, database, kms_key):
-    wr.s3.delete_objects(path=f"s3://{bucket}/test_athena_ctas/")
-    wr.s3.delete_objects(path=f"s3://{bucket}/test_athena_ctas_result/")
-    df = get_df_list()
-    columns_types, partitions_types = wr.catalog.extract_athena_types(df=df, partition_cols=["par0", "par1"])
-    assert len(columns_types) == 16
-    assert len(partitions_types) == 2
-    with pytest.raises(wr.exceptions.InvalidArgumentValue):
-        wr.catalog.extract_athena_types(df=df, file_format="avro")
+@pytest.fixture(scope="function")
+def path(bucket):
+    yield from path_generator(bucket)
+
+
+@pytest.fixture(scope="function")
+def path2(bucket):
+    yield from path_generator(bucket)
+
+
+@pytest.fixture(scope="function")
+def path3(bucket):
+    yield from path_generator(bucket)
+
+
+def test_to_parquet_modes(database, table, path, external_schema):
+
+    # Round 1 - Warm up
+    df = pd.DataFrame({"c0": [0, None]}, dtype="Int64")
     paths = wr.s3.to_parquet(
-        df=get_df_list(),
-        path=f"s3://{bucket}/test_athena_ctas",
-        index=True,
-        use_threads=True,
+        df=df,
+        path=path,
         dataset=True,
         mode="overwrite",
         database=database,
-        table="test_athena_ctas",
-        partition_cols=["par0", "par1"],
+        table=table,
+        description="c0",
+        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index))},
+        columns_comments={"c0": "0"},
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    dirs = wr.s3.list_directories(path=f"s3://{bucket}/test_athena_ctas/")
-    for d in dirs:
-        assert d.startswith(f"s3://{bucket}/test_athena_ctas/par0=")
-    df = wr.s3.read_parquet_table(table="test_athena_ctas", database=database)
-    assert len(df.index) == 3
-    ensure_data_types(df=df, has_list=True)
-    df = wr.athena.read_sql_table(
-        table="test_athena_ctas",
-        database=database,
-        ctas_approach=True,
-        encryption="SSE_KMS",
-        kms_key=kms_key,
-        s3_output=f"s3://{bucket}/test_athena_ctas_result",
-        keep_files=False,
-    )
-    assert len(df.index) == 3
-    ensure_data_types(df=df, has_list=True)
-    temp_table = "test_athena_ctas2"
-    s3_output = f"s3://{bucket}/s3_output/"
-    final_destination = f"{s3_output}{temp_table}/"
+    df2 = wr.athena.read_sql_table(table, database)
+    assert df.shape == df2.shape
+    assert df.c0.sum() == df2.c0.sum()
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == str(len(df2.columns))
+    assert parameters["num_rows"] == str(len(df2.index))
+    assert wr.catalog.get_table_description(database, table) == "c0"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c0"] == "0"
 
-    # keep_files=False
-    wr.s3.delete_objects(path=s3_output)
-    dfs = wr.athena.read_sql_query(
-        sql="SELECT * FROM test_athena_ctas",
+    # Round 2 - Overwrite
+    df = pd.DataFrame({"c1": [None, 1, None]}, dtype="Int16")
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        mode="overwrite",
         database=database,
-        ctas_approach=True,
-        chunksize=1,
-        keep_files=False,
-        ctas_temp_table_name=temp_table,
-        s3_output=s3_output,
-    )
-    assert wr.catalog.does_table_exist(database=database, table=temp_table) is False
-    assert len(wr.s3.list_objects(path=s3_output)) > 2
-    assert len(wr.s3.list_objects(path=final_destination)) > 0
-    for df in dfs:
-        ensure_data_types(df=df, has_list=True)
-    assert len(wr.s3.list_objects(path=s3_output)) == 0
+        table=table,
+        description="c1",
+        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index))},
+        columns_comments={"c1": "1"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    df2 = wr.athena.read_sql_table(table, database)
+    assert df.shape == df2.shape
+    assert df.c1.sum() == df2.c1.sum()
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == str(len(df2.columns))
+    assert parameters["num_rows"] == str(len(df2.index))
+    assert wr.catalog.get_table_description(database, table) == "c1"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c1"] == "1"
 
-    # keep_files=True
-    wr.s3.delete_objects(path=s3_output)
-    dfs = wr.athena.read_sql_query(
-        sql="SELECT * FROM test_athena_ctas",
+    # Round 3 - Append
+    df = pd.DataFrame({"c1": [None, 2, None]}, dtype="Int8")
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        mode="append",
         database=database,
-        ctas_approach=True,
-        chunksize=2,
-        keep_files=True,
-        ctas_temp_table_name=temp_table,
-        s3_output=s3_output,
-    )
-    assert wr.catalog.does_table_exist(database=database, table=temp_table) is False
-    assert len(wr.s3.list_objects(path=s3_output)) > 2
-    assert len(wr.s3.list_objects(path=final_destination)) > 0
-    for df in dfs:
-        ensure_data_types(df=df, has_list=True)
-    assert len(wr.s3.list_objects(path=s3_output)) > 2
+        table=table,
+        description="c1",
+        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index) * 2)},
+        columns_comments={"c1": "1"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    df2 = wr.athena.read_sql_table(table, database)
+    assert len(df.columns) == len(df2.columns)
+    assert len(df.index) * 2 == len(df2.index)
+    assert df.c1.sum() + 1 == df2.c1.sum()
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == str(len(df2.columns))
+    assert parameters["num_rows"] == str(len(df2.index))
+    assert wr.catalog.get_table_description(database, table) == "c1"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c1"] == "1"
 
-    # Cleaning Up
-    wr.catalog.delete_table_if_exists(database=database, table="test_athena_ctas")
-    wr.s3.delete_objects(path=paths)
-    wr.s3.wait_objects_not_exist(paths=paths)
-    wr.s3.delete_objects(path=f"s3://{bucket}/test_athena_ctas_result/")
+    # Round 4 - Append + New Column
+    df = pd.DataFrame({"c2": ["a", None, "b"], "c1": [None, None, None]})
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        mode="append",
+        database=database,
+        table=table,
+        description="c1+c2",
+        parameters={"num_cols": "2", "num_rows": "9"},
+        columns_comments={"c1": "1", "c2": "2"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    df2 = wr.athena.read_sql_table(table, database)
+    assert len(df2.columns) == 2
+    assert len(df2.index) == 9
+    assert df2.c1.sum() == 3
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == "2"
+    assert parameters["num_rows"] == "9"
+    assert wr.catalog.get_table_description(database, table) == "c1+c2"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c1"] == "1"
+    assert comments["c2"] == "2"
 
+    # Round 5 - Append + New Column + Wrong Types
+    df = pd.DataFrame({"c2": [1], "c3": [True], "c1": ["1"]})
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        mode="append",
+        database=database,
+        table=table,
+        description="c1+c2+c3",
+        parameters={"num_cols": "3", "num_rows": "10"},
+        columns_comments={"c1": "1!", "c2": "2!", "c3": "3"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    df2 = wr.athena.read_sql_table(table, database)
+    assert len(df2.columns) == 3
+    assert len(df2.index) == 10
+    assert df2.c1.sum() == 4
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == "3"
+    assert parameters["num_rows"] == "10"
+    assert wr.catalog.get_table_description(database, table) == "c1+c2+c3"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c1"] == "1!"
+    assert comments["c2"] == "2!"
+    assert comments["c3"] == "3"
+    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
+    df3 = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
+    assert len(df3.columns) == 3
+    assert len(df3.index) == 10
+    assert df3.c1.sum() == 4
 
-def test_athena(path, database, kms_key, workgroup0, workgroup1):
-    wr.catalog.delete_table_if_exists(database=database, table="__test_athena")
+    # Round 6 - Overwrite Partitioned
+    df = pd.DataFrame({"c0": ["foo", None], "c1": [0, 1]})
     paths = wr.s3.to_parquet(
-        df=get_df(),
+        df=df,
         path=path,
-        index=True,
-        use_threads=True,
         dataset=True,
         mode="overwrite",
         database=database,
-        table="__test_athena",
-        partition_cols=["par0", "par1"],
+        table=table,
+        partition_cols=["c1"],
+        description="c0+c1",
+        parameters={"num_cols": "2", "num_rows": "2"},
+        columns_comments={"c0": "zero", "c1": "one"},
     )["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    dfs = wr.athena.read_sql_query(
-        sql="SELECT * FROM __test_athena",
-        database=database,
-        ctas_approach=False,
-        chunksize=1,
-        encryption="SSE_KMS",
-        kms_key=kms_key,
-        workgroup=workgroup0,
-        keep_files=False,
-    )
-    for df2 in dfs:
-        print(df2)
-        ensure_data_types(df=df2)
-    df = wr.athena.read_sql_query(
-        sql="SELECT * FROM __test_athena",
-        database=database,
-        ctas_approach=False,
-        workgroup=workgroup1,
-        keep_files=False,
-    )
-    assert len(df.index) == 3
-    ensure_data_types(df=df)
-    wr.athena.repair_table(table="__test_athena", database=database)
-    wr.catalog.delete_table_if_exists(database=database, table="__test_athena")
-
+    wr.s3.wait_objects_exist(paths=paths)
+    df2 = wr.athena.read_sql_table(table, database)
+    assert df.shape == df2.shape
+    assert df.c1.sum() == df2.c1.sum()
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == "2"
+    assert parameters["num_rows"] == "2"
+    assert wr.catalog.get_table_description(database, table) == "c0+c1"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c0"] == "zero"
+    assert comments["c1"] == "one"
 
-def test_csv(bucket):
-    session = boto3.Session()
-    df = pd.DataFrame({"id": [1, 2, 3]})
-    path0 = f"s3://{bucket}/test_csv0.csv"
-    path1 = f"s3://{bucket}/test_csv1.csv"
-    path2 = f"s3://{bucket}/test_csv2.csv"
-    wr.s3.to_csv(df=df, path=path0, index=False)
-    wr.s3.wait_objects_exist(paths=[path0])
-    assert wr.s3.does_object_exist(path=path0) is True
-    assert wr.s3.size_objects(path=[path0], use_threads=False)[path0] == 9
-    assert wr.s3.size_objects(path=[path0], use_threads=True)[path0] == 9
-    wr.s3.to_csv(df=df, path=path1, index=False, boto3_session=None)
-    wr.s3.to_csv(df=df, path=path2, index=False, boto3_session=session)
-    assert df.equals(wr.s3.read_csv(path=path0, use_threads=False))
-    assert df.equals(wr.s3.read_csv(path=path0, use_threads=True))
-    assert df.equals(wr.s3.read_csv(path=path0, use_threads=False, boto3_session=session))
-    assert df.equals(wr.s3.read_csv(path=path0, use_threads=True, boto3_session=session))
-    paths = [path0, path1, path2]
-    df2 = pd.concat(objs=[df, df, df], sort=False, ignore_index=True)
-    assert df2.equals(wr.s3.read_csv(path=paths, use_threads=False))
-    assert df2.equals(wr.s3.read_csv(path=paths, use_threads=True))
-    assert df2.equals(wr.s3.read_csv(path=paths, use_threads=False, boto3_session=session))
-    assert df2.equals(wr.s3.read_csv(path=paths, use_threads=True, boto3_session=session))
-    with pytest.raises(wr.exceptions.InvalidArgumentType):
-        wr.s3.read_csv(path=1)
-    with pytest.raises(wr.exceptions.InvalidArgument):
-        wr.s3.read_csv(path=paths, iterator=True)
-    wr.s3.delete_objects(path=paths, use_threads=False)
-    wr.s3.wait_objects_not_exist(paths=paths, use_threads=False)
-
-
-def test_json(bucket):
-    df0 = pd.DataFrame({"id": [1, 2, 3]})
-    path0 = f"s3://{bucket}/test_json0.json"
-    path1 = f"s3://{bucket}/test_json1.json"
-    wr.s3.to_json(df=df0, path=path0)
-    wr.s3.to_json(df=df0, path=path1)
-    wr.s3.wait_objects_exist(paths=[path0, path1])
-    assert df0.equals(wr.s3.read_json(path=path0, use_threads=False))
-    df1 = pd.concat(objs=[df0, df0], sort=False, ignore_index=True)
-    assert df1.equals(wr.s3.read_json(path=[path0, path1], use_threads=True))
-    wr.s3.delete_objects(path=[path0, path1], use_threads=False)
-
-
-def test_fwf(path):
-    text = "1 Herfelingen27-12-18\n2   Lambusart14-06-18\n3Spormaggiore15-04-18"
-    client_s3 = boto3.client("s3")
-    path0 = f"{path}/0.txt"
-    bucket, key = wr._utils.parse_path(path0)
-    client_s3.put_object(Body=text, Bucket=bucket, Key=key)
-    path1 = f"{path}/1.txt"
-    bucket, key = wr._utils.parse_path(path1)
-    client_s3.put_object(Body=text, Bucket=bucket, Key=key)
-    wr.s3.wait_objects_exist(paths=[path0, path1])
-    df = wr.s3.read_fwf(path=path0, use_threads=False, widths=[1, 12, 8], names=["id", "name", "date"])
-    assert len(df.index) == 3
-    assert len(df.columns) == 3
-    df = wr.s3.read_fwf(path=[path0, path1], use_threads=True, widths=[1, 12, 8], names=["id", "name", "date"])
-    assert len(df.index) == 6
-    assert len(df.columns) == 3
-
-
-def test_parquet(bucket):
-    wr.s3.delete_objects(path=f"s3://{bucket}/test_parquet/")
-    df_file = pd.DataFrame({"id": [1, 2, 3]})
-    path_file = f"s3://{bucket}/test_parquet/test_parquet_file.parquet"
-    df_dataset = pd.DataFrame({"id": [1, 2, 3], "partition": ["A", "A", "B"]})
-    df_dataset["partition"] = df_dataset["partition"].astype("category")
-    path_dataset = f"s3://{bucket}/test_parquet/test_parquet_dataset"
-    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_parquet(df=df_file, path=path_file, mode="append")
-    with pytest.raises(wr.exceptions.InvalidCompression):
-        wr.s3.to_parquet(df=df_file, path=path_file, compression="WRONG")
-    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_parquet(df=df_dataset, path=path_dataset, partition_cols=["col2"])
-    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_parquet(df=df_dataset, path=path_dataset, description="foo")
-    with pytest.raises(wr.exceptions.InvalidArgumentValue):
-        wr.s3.to_parquet(df=df_dataset, path=path_dataset, partition_cols=["col2"], dataset=True, mode="WRONG")
-    paths = wr.s3.to_parquet(df=df_file, path=path_file)["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    assert len(wr.s3.read_parquet(path=path_file, use_threads=True, boto3_session=None).index) == 3
-    assert len(wr.s3.read_parquet(path=[path_file], use_threads=False, boto3_session=boto3.Session()).index) == 3
-    paths = wr.s3.to_parquet(df=df_dataset, path=path_dataset, dataset=True)["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    assert len(wr.s3.read_parquet(path=paths, dataset=True).index) == 3
-    assert len(wr.s3.read_parquet(path=path_dataset, use_threads=True, boto3_session=boto3.Session()).index) == 3
-    dataset_paths = wr.s3.to_parquet(
-        df=df_dataset, path=path_dataset, dataset=True, partition_cols=["partition"], mode="overwrite"
+    # Round 7 - Overwrite Partitions
+    df = pd.DataFrame({"c0": [None, None], "c1": [0, 2]})
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        mode="overwrite_partitions",
+        database=database,
+        table=table,
+        partition_cols=["c1"],
+        description="c0+c1",
+        parameters={"num_cols": "2", "num_rows": "3"},
+        columns_comments={"c0": "zero", "c1": "one"},
     )["paths"]
-    wr.s3.wait_objects_exist(paths=dataset_paths)
-    assert len(wr.s3.read_parquet(path=path_dataset, use_threads=True, boto3_session=None).index) == 3
-    assert len(wr.s3.read_parquet(path=dataset_paths, use_threads=True).index) == 3
-    assert len(wr.s3.read_parquet(path=path_dataset, dataset=True, use_threads=True).index) == 3
-    wr.s3.to_parquet(df=df_dataset, path=path_dataset, dataset=True, partition_cols=["partition"], mode="overwrite")
-    wr.s3.to_parquet(
-        df=df_dataset, path=path_dataset, dataset=True, partition_cols=["partition"], mode="overwrite_partitions"
-    )
-    wr.s3.delete_objects(path=f"s3://{bucket}/test_parquet/")
-
+    wr.s3.wait_objects_exist(paths=paths)
+    df2 = wr.athena.read_sql_table(table, database)
+    assert len(df2.columns) == 2
+    assert len(df2.index) == 3
+    assert df2.c1.sum() == 3
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == "2"
+    assert parameters["num_rows"] == "3"
+    assert wr.catalog.get_table_description(database, table) == "c0+c1"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c0"] == "zero"
+    assert comments["c1"] == "one"
 
-def test_parquet_catalog(bucket, database):
-    with pytest.raises(wr.exceptions.UndetectedType):
-        wr.s3.to_parquet(
-            df=pd.DataFrame({"A": [None]}),
-            path=f"s3://{bucket}/test_parquet_catalog",
-            dataset=True,
-            database=database,
-            table="test_parquet_catalog",
-        )
-    df = get_df_list()
-    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_parquet(
-            df=df,
-            path=f"s3://{bucket}/test_parquet_catalog",
-            use_threads=True,
-            dataset=False,
-            mode="overwrite",
-            database=database,
-            table="test_parquet_catalog",
-        )
-    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_parquet(
-            df=df,
-            path=f"s3://{bucket}/test_parquet_catalog",
-            use_threads=True,
-            dataset=False,
-            table="test_parquet_catalog",
-        )
-    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_parquet(
-            df=df,
-            path=f"s3://{bucket}/test_parquet_catalog",
-            use_threads=True,
-            dataset=True,
-            mode="overwrite",
-            database=database,
-        )
-    wr.s3.to_parquet(
+    # Round 8 - Overwrite Partitions + New Column + Wrong Type
+    df = pd.DataFrame({"c0": [1, 2], "c1": ["1", "3"], "c2": [True, False]})
+    paths = wr.s3.to_parquet(
         df=df,
-        path=f"s3://{bucket}/test_parquet_catalog",
-        use_threads=True,
+        path=path,
         dataset=True,
-        mode="overwrite",
+        mode="overwrite_partitions",
         database=database,
-        table="test_parquet_catalog",
-    )
-    wr.s3.to_parquet(
-        df=df,
-        path=f"s3://{bucket}/test_parquet_catalog2",
-        index=True,
-        use_threads=True,
+        table=table,
+        partition_cols=["c1"],
+        description="c0+c1+c2",
+        parameters={"num_cols": "3", "num_rows": "4"},
+        columns_comments={"c0": "zero", "c1": "one", "c2": "two"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    df2 = wr.athena.read_sql_table(table, database)
+    assert len(df2.columns) == 3
+    assert len(df2.index) == 4
+    assert df2.c1.sum() == 6
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == "3"
+    assert parameters["num_rows"] == "4"
+    assert wr.catalog.get_table_description(database, table) == "c0+c1+c2"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c0"] == "zero"
+    assert comments["c1"] == "one"
+    assert comments["c2"] == "two"
+    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
+    df3 = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
+    assert len(df3.columns) == 3
+    assert len(df3.index) == 4
+    assert df3.c1.sum() == 6
+
+
+def test_store_parquet_metadata_modes(database, table, path, external_schema):
+
+    # Round 1 - Warm up
+    df = pd.DataFrame({"c0": [0, None]}, dtype="Int64")
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite")["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    wr.s3.store_parquet_metadata(
+        path=path,
         dataset=True,
         mode="overwrite",
         database=database,
-        table="test_parquet_catalog2",
-        partition_cols=["iint8", "iint16"],
-    )
-    columns_types, partitions_types = wr.s3.read_parquet_metadata(
-        path=f"s3://{bucket}/test_parquet_catalog2", dataset=True
-    )
-    assert len(columns_types) == 17
-    assert len(partitions_types) == 2
-    columns_types, partitions_types, partitions_values = wr.s3.store_parquet_metadata(
-        path=f"s3://{bucket}/test_parquet_catalog2", database=database, table="test_parquet_catalog2", dataset=True
+        table=table,
+        description="c0",
+        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index))},
+        columns_comments={"c0": "0"},
     )
-    assert len(columns_types) == 17
-    assert len(partitions_types) == 2
-    assert len(partitions_values) == 2
-    wr.s3.delete_objects(path=f"s3://{bucket}/test_parquet_catalog/")
-    wr.s3.delete_objects(path=f"s3://{bucket}/test_parquet_catalog2/")
-    assert wr.catalog.delete_table_if_exists(database=database, table="test_parquet_catalog") is True
-    assert wr.catalog.delete_table_if_exists(database=database, table="test_parquet_catalog2") is True
-
+    df2 = wr.athena.read_sql_table(table, database)
+    assert df.shape == df2.shape
+    assert df.c0.sum() == df2.c0.sum()
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == str(len(df2.columns))
+    assert parameters["num_rows"] == str(len(df2.index))
+    assert wr.catalog.get_table_description(database, table) == "c0"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c0"] == "0"
 
-def test_parquet_catalog_duplicated(bucket, database):
-    path = f"s3://{bucket}/test_parquet_catalog_dedup/"
-    df = pd.DataFrame({"A": [1], "a": [1]})
-    wr.s3.to_parquet(
-        df=df,
+    # Round 2 - Overwrite
+    df = pd.DataFrame({"c1": [None, 1, None]}, dtype="Int16")
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite")["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    wr.s3.store_parquet_metadata(
         path=path,
-        index=False,
         dataset=True,
         mode="overwrite",
         database=database,
-        table="test_parquet_catalog_dedup",
+        table=table,
+        description="c1",
+        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index))},
+        columns_comments={"c1": "1"},
     )
-    df = wr.s3.read_parquet(path=path)
-    assert len(df.index) == 1
-    assert len(df.columns) == 1
-    wr.s3.delete_objects(path=path)
-    assert wr.catalog.delete_table_if_exists(database=database, table="test_parquet_catalog_dedup") is True
-
+    df2 = wr.athena.read_sql_table(table, database)
+    assert df.shape == df2.shape
+    assert df.c1.sum() == df2.c1.sum()
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == str(len(df2.columns))
+    assert parameters["num_rows"] == str(len(df2.index))
+    assert wr.catalog.get_table_description(database, table) == "c1"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c1"] == "1"
 
-def test_parquet_catalog_casting(bucket, database):
-    path = f"s3://{bucket}/test_parquet_catalog_casting/"
-    paths = wr.s3.to_parquet(
-        df=get_df_cast(),
+    # Round 3 - Append
+    df = pd.DataFrame({"c1": [None, 2, None]}, dtype="Int16")
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="append")["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    wr.s3.store_parquet_metadata(
         path=path,
-        index=False,
         dataset=True,
-        mode="overwrite",
+        mode="append",
         database=database,
-        table="__test_parquet_catalog_casting",
-        dtype={
-            "iint8": "tinyint",
-            "iint16": "smallint",
-            "iint32": "int",
-            "iint64": "bigint",
-            "float": "float",
-            "double": "double",
-            "decimal": "decimal(3,2)",
-            "string": "string",
-            "date": "date",
-            "timestamp": "timestamp",
-            "bool": "boolean",
-            "binary": "binary",
-            "category": "double",
-            "par0": "bigint",
-            "par1": "string",
-        },
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df = wr.s3.read_parquet(path=path)
-    assert len(df.index) == 3
-    assert len(df.columns) == 15
-    ensure_data_types(df=df, has_list=False)
-    df = wr.athena.read_sql_table(table="__test_parquet_catalog_casting", database=database, ctas_approach=True)
-    assert len(df.index) == 3
-    assert len(df.columns) == 15
-    ensure_data_types(df=df, has_list=False)
-    df = wr.athena.read_sql_table(table="__test_parquet_catalog_casting", database=database, ctas_approach=False)
-    assert len(df.index) == 3
-    assert len(df.columns) == 15
-    ensure_data_types(df=df, has_list=False)
-    wr.s3.delete_objects(path=path)
-    assert wr.catalog.delete_table_if_exists(database=database, table="__test_parquet_catalog_casting") is True
+        table=table,
+        description="c1",
+        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index) * 2)},
+        columns_comments={"c1": "1"},
+    )
+    df2 = wr.athena.read_sql_table(table, database)
+    assert len(df.columns) == len(df2.columns)
+    assert len(df.index) * 2 == len(df2.index)
+    assert df.c1.sum() + 1 == df2.c1.sum()
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == str(len(df2.columns))
+    assert parameters["num_rows"] == str(len(df2.index))
+    assert wr.catalog.get_table_description(database, table) == "c1"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c1"] == "1"
 
+    # Round 4 - Append + New Column
+    df = pd.DataFrame({"c2": ["a", None, "b"], "c1": [None, 1, None]})
+    df["c1"] = df["c1"].astype("Int16")
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="append")["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    wr.s3.store_parquet_metadata(
+        path=path,
+        dataset=True,
+        mode="append",
+        database=database,
+        table=table,
+        description="c1+c2",
+        parameters={"num_cols": "2", "num_rows": "9"},
+        columns_comments={"c1": "1", "c2": "2"},
+    )
+    df2 = wr.athena.read_sql_table(table, database)
+    assert len(df2.columns) == 2
+    assert len(df2.index) == 9
+    assert df2.c1.sum() == 4
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == "2"
+    assert parameters["num_rows"] == "9"
+    assert wr.catalog.get_table_description(database, table) == "c1+c2"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c1"] == "1"
+    assert comments["c2"] == "2"
 
-def test_catalog(path, database, table):
-    account_id = boto3.client("sts").get_caller_identity().get("Account")
-    assert wr.catalog.does_table_exist(database=database, table=table) is False
-    wr.catalog.create_parquet_table(
+    # Round 5 - Overwrite Partitioned
+    df = pd.DataFrame({"c0": ["foo", None], "c1": [0, 1]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite", partition_cols=["c1"])["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    wr.s3.store_parquet_metadata(
+        path=path,
+        dataset=True,
+        mode="overwrite",
         database=database,
         table=table,
+        description="c0+c1",
+        parameters={"num_cols": "2", "num_rows": "2"},
+        columns_comments={"c0": "zero", "c1": "one"},
+    )
+    df2 = wr.athena.read_sql_table(table, database)
+    assert df.shape == df2.shape
+    assert df.c1.sum() == df2.c1.astype(int).sum()
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == "2"
+    assert parameters["num_rows"] == "2"
+    assert wr.catalog.get_table_description(database, table) == "c0+c1"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c0"] == "zero"
+    assert comments["c1"] == "one"
+
+    # Round 6 - Overwrite Partitions
+    df = pd.DataFrame({"c0": [None, "boo"], "c1": [0, 2]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite_partitions", partition_cols=["c1"])[
+        "paths"
+    ]
+    wr.s3.wait_objects_exist(paths=paths)
+    wr.s3.store_parquet_metadata(
         path=path,
-        columns_types={"col0": "int", "col1": "double"},
-        partitions_types={"y": "int", "m": "int"},
-        compression="snappy",
+        dataset=True,
+        mode="append",
+        database=database,
+        table=table,
+        description="c0+c1",
+        parameters={"num_cols": "2", "num_rows": "3"},
+        columns_comments={"c0": "zero", "c1": "one"},
     )
-    with pytest.raises(wr.exceptions.InvalidArgumentValue):
-        wr.catalog.create_parquet_table(
-            database=database, table=table, path=path, columns_types={"col0": "string"}, mode="append"
-        )
-    assert wr.catalog.does_table_exist(database=database, table=table) is True
-    assert wr.catalog.delete_table_if_exists(database=database, table=table) is True
-    assert wr.catalog.delete_table_if_exists(database=database, table=table) is False
-    wr.catalog.create_parquet_table(
+    df2 = wr.athena.read_sql_table(table, database)
+    assert len(df2.columns) == 2
+    assert len(df2.index) == 3
+    assert df2.c1.astype(int).sum() == 3
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == "2"
+    assert parameters["num_rows"] == "3"
+    assert wr.catalog.get_table_description(database, table) == "c0+c1"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c0"] == "zero"
+    assert comments["c1"] == "one"
+
+    # Round 7 - Overwrite Partitions + New Column
+    df = pd.DataFrame({"c0": ["bar", None], "c1": [1, 3], "c2": [True, False]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite_partitions", partition_cols=["c1"])[
+        "paths"
+    ]
+    wr.s3.wait_objects_exist(paths=paths)
+    wr.s3.store_parquet_metadata(
+        path=path,
+        dataset=True,
+        mode="append",
         database=database,
         table=table,
+        description="c0+c1+c2",
+        parameters={"num_cols": "3", "num_rows": "4"},
+        columns_comments={"c0": "zero", "c1": "one", "c2": "two"},
+    )
+    df2 = wr.athena.read_sql_table(table, database)
+    assert len(df2.columns) == 3
+    assert len(df2.index) == 4
+    assert df2.c1.astype(int).sum() == 6
+    parameters = wr.catalog.get_table_parameters(database, table)
+    assert len(parameters) >= 5
+    assert parameters["num_cols"] == "3"
+    assert parameters["num_rows"] == "4"
+    assert wr.catalog.get_table_description(database, table) == "c0+c1+c2"
+    comments = wr.catalog.get_columns_comments(database, table)
+    assert len(comments) == len(df.columns)
+    assert comments["c0"] == "zero"
+    assert comments["c1"] == "one"
+    assert comments["c2"] == "two"
+    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
+    df3 = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
+    assert len(df3.columns) == 3
+    assert len(df3.index) == 4
+    assert df3.c1.astype(int).sum() == 6
+
+
+def test_athena_ctas(path, path2, path3, table, table2, database, kms_key):
+    df = get_df_list()
+    columns_types, partitions_types = wr.catalog.extract_athena_types(df=df, partition_cols=["par0", "par1"])
+    assert len(columns_types) == 16
+    assert len(partitions_types) == 2
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        wr.catalog.extract_athena_types(df=df, file_format="avro")
+    paths = wr.s3.to_parquet(
+        df=get_df_list(),
         path=path,
-        columns_types={"col0": "int", "col1": "double"},
-        partitions_types={"y": "int", "m": "int"},
-        compression="snappy",
-        description="Foo boo bar",
-        parameters={"tag": "test"},
-        columns_comments={"col0": "my int", "y": "year"},
+        index=True,
+        use_threads=True,
+        dataset=True,
         mode="overwrite",
-    )
-    wr.catalog.add_parquet_partitions(
         database=database,
         table=table,
-        partitions_values={f"{path}y=2020/m=1/": ["2020", "1"], f"{path}y=2021/m=2/": ["2021", "2"]},
-        compression="snappy",
-    )
-    assert wr.catalog.get_table_location(database=database, table=table) == path
-    partitions_values = wr.catalog.get_parquet_partitions(database=database, table=table)
-    assert len(partitions_values) == 2
-    partitions_values = wr.catalog.get_parquet_partitions(
-        database=database, table=table, catalog_id=account_id, expression="y = 2021 AND m = 2"
+        partition_cols=["par0", "par1"],
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    dirs = wr.s3.list_directories(path=path)
+    for d in dirs:
+        assert d.startswith(f"{path}par0=")
+    df = wr.s3.read_parquet_table(table=table, database=database)
+    assert len(df.index) == 3
+    ensure_data_types(df=df, has_list=True)
+    df = wr.athena.read_sql_table(
+        table=table,
+        database=database,
+        ctas_approach=True,
+        encryption="SSE_KMS",
+        kms_key=kms_key,
+        s3_output=path2,
+        keep_files=False,
     )
-    assert len(partitions_values) == 1
-    assert len(set(partitions_values[f"{path}y=2021/m=2/"]) & {"2021", "2"}) == 2
-    dtypes = wr.catalog.get_table_types(database=database, table=table)
-    assert dtypes["col0"] == "int"
-    assert dtypes["col1"] == "double"
-    assert dtypes["y"] == "int"
-    assert dtypes["m"] == "int"
-    df_dbs = wr.catalog.databases()
-    assert len(wr.catalog.databases(catalog_id=account_id)) == len(df_dbs)
-    assert database in df_dbs["Database"].to_list()
-    tables = list(wr.catalog.get_tables())
-    assert len(tables) > 0
-    for tbl in tables:
-        if tbl["Name"] == table:
-            assert tbl["TableType"] == "EXTERNAL_TABLE"
-    tables = list(wr.catalog.get_tables(database=database))
-    assert len(tables) > 0
-    for tbl in tables:
-        assert tbl["DatabaseName"] == database
-    # search
-    tables = list(wr.catalog.search_tables(text="parquet", catalog_id=account_id))
-    assert len(tables) > 0
-    for tbl in tables:
-        if tbl["Name"] == table:
-            assert tbl["TableType"] == "EXTERNAL_TABLE"
-    # prefix
-    tables = list(wr.catalog.get_tables(name_prefix=table[:4], catalog_id=account_id))
-    assert len(tables) > 0
-    for tbl in tables:
-        if tbl["Name"] == table:
-            assert tbl["TableType"] == "EXTERNAL_TABLE"
-    # suffix
-    tables = list(wr.catalog.get_tables(name_suffix=table[-4:], catalog_id=account_id))
-    assert len(tables) > 0
-    for tbl in tables:
-        if tbl["Name"] == table:
-            assert tbl["TableType"] == "EXTERNAL_TABLE"
-    # name_contains
-    tables = list(wr.catalog.get_tables(name_contains=table[4:-4], catalog_id=account_id))
-    assert len(tables) > 0
-    for tbl in tables:
-        if tbl["Name"] == table:
-            assert tbl["TableType"] == "EXTERNAL_TABLE"
-    # prefix & suffix & name_contains
-    tables = list(
-        wr.catalog.get_tables(
-            name_prefix=table[0], name_contains=table[3], name_suffix=table[-1], catalog_id=account_id
-        )
-    )
-    assert len(tables) > 0
-    for tbl in tables:
-        if tbl["Name"] == table:
-            assert tbl["TableType"] == "EXTERNAL_TABLE"
-    # prefix & suffix
-    tables = list(wr.catalog.get_tables(name_prefix=table[0], name_suffix=table[-1], catalog_id=account_id))
-    assert len(tables) > 0
-    for tbl in tables:
-        if tbl["Name"] == table:
-            assert tbl["TableType"] == "EXTERNAL_TABLE"
-    # DataFrames
-    assert len(wr.catalog.databases().index) > 0
-    assert len(wr.catalog.tables().index) > 0
-    assert (
-        len(
-            wr.catalog.tables(
-                database=database,
-                search_text="parquet",
-                name_prefix=table[0],
-                name_contains=table[3],
-                name_suffix=table[-1],
-                catalog_id=account_id,
-            ).index
-        )
-        > 0
-    )
-    assert len(wr.catalog.table(database=database, table=table).index) > 0
-    assert len(wr.catalog.table(database=database, table=table, catalog_id=account_id).index) > 0
-    with pytest.raises(wr.exceptions.InvalidTable):
-        wr.catalog.overwrite_table_parameters({"foo": "boo"}, database, "fake_table")
-
-
-def test_s3_get_bucket_region(bucket, region):
-    assert wr.s3.get_bucket_region(bucket=bucket) == region
-    assert wr.s3.get_bucket_region(bucket=bucket, boto3_session=boto3.Session()) == region
-
-
-def test_catalog_get_databases(database):
-    dbs = list(wr.catalog.get_databases())
-    assert len(dbs) > 0
-    for db in dbs:
-        if db["Name"] == database:
-            assert db["Description"] == "AWS Data Wrangler Test Arena - Glue Database"
-
-
-def test_athena_query_cancelled(database):
-    session = boto3.Session()
-    query_execution_id = wr.athena.start_query_execution(sql=get_query_long(), database=database, boto3_session=session)
-    wr.athena.stop_query_execution(query_execution_id=query_execution_id, boto3_session=session)
-    with pytest.raises(wr.exceptions.QueryCancelled):
-        assert wr.athena.wait_query(query_execution_id=query_execution_id)
-
-
-def test_athena_query_failed(database):
-    query_execution_id = wr.athena.start_query_execution(sql="SELECT random(-1)", database=database)
-    with pytest.raises(wr.exceptions.QueryFailed):
-        assert wr.athena.wait_query(query_execution_id=query_execution_id)
-
-
-def test_athena_read_list(database):
-    with pytest.raises(wr.exceptions.UnsupportedType):
-        wr.athena.read_sql_query(sql="SELECT ARRAY[1, 2, 3]", database=database, ctas_approach=False)
-
-
-def test_sanitize_names():
-    assert wr.catalog.sanitize_column_name("CamelCase") == "camel_case"
-    assert wr.catalog.sanitize_column_name("CamelCase2") == "camel_case2"
-    assert wr.catalog.sanitize_column_name("Camel_Case3") == "camel_case3"
-    assert wr.catalog.sanitize_column_name("Cámël_Casë4仮") == "camel_case4_"
-    assert wr.catalog.sanitize_column_name("Camel__Case5") == "camel__case5"
-    assert wr.catalog.sanitize_column_name("Camel{}Case6") == "camel_case6"
-    assert wr.catalog.sanitize_column_name("Camel.Case7") == "camel_case7"
-    assert wr.catalog.sanitize_column_name("xyz_cd") == "xyz_cd"
-    assert wr.catalog.sanitize_column_name("xyz_Cd") == "xyz_cd"
-    assert wr.catalog.sanitize_table_name("CamelCase") == "camel_case"
-    assert wr.catalog.sanitize_table_name("CamelCase2") == "camel_case2"
-    assert wr.catalog.sanitize_table_name("Camel_Case3") == "camel_case3"
-    assert wr.catalog.sanitize_table_name("Cámël_Casë4仮") == "camel_case4_"
-    assert wr.catalog.sanitize_table_name("Camel__Case5") == "camel__case5"
-    assert wr.catalog.sanitize_table_name("Camel{}Case6") == "camel_case6"
-    assert wr.catalog.sanitize_table_name("Camel.Case7") == "camel_case7"
-    assert wr.catalog.sanitize_table_name("xyz_cd") == "xyz_cd"
-    assert wr.catalog.sanitize_table_name("xyz_Cd") == "xyz_cd"
-
-
-def test_athena_ctas_empty(database):
-    sql = """
-        WITH dataset AS (
-          SELECT 0 AS id
-        )
-        SELECT id
-        FROM dataset
-        WHERE id != 0
-    """
-    assert wr.athena.read_sql_query(sql=sql, database=database).empty is True
-    assert len(list(wr.athena.read_sql_query(sql=sql, database=database, chunksize=1))) == 0
-
-
-def test_s3_empty_dfs():
-    df = pd.DataFrame()
-    with pytest.raises(wr.exceptions.EmptyDataFrame):
-        wr.s3.to_parquet(df=df, path="")
-    with pytest.raises(wr.exceptions.EmptyDataFrame):
-        wr.s3.to_csv(df=df, path="")
-
-
-def test_absent_object(bucket):
-    path = f"s3://{bucket}/test_absent_object"
-    assert wr.s3.does_object_exist(path=path) is False
-    assert len(wr.s3.size_objects(path=path)) == 0
-    assert wr.s3.wait_objects_exist(paths=[]) is None
-
-
-def test_athena_struct(database):
-    sql = "SELECT CAST(ROW(1, 'foo') AS ROW(id BIGINT, value VARCHAR)) AS col0"
-    with pytest.raises(wr.exceptions.UnsupportedType):
-        wr.athena.read_sql_query(sql=sql, database=database, ctas_approach=False)
-    df = wr.athena.read_sql_query(sql=sql, database=database, ctas_approach=True)
-    assert len(df.index) == 1
-    assert len(df.columns) == 1
-    assert df["col0"].iloc[0]["id"] == 1
-    assert df["col0"].iloc[0]["value"] == "foo"
-    sql = "SELECT ROW(1, ROW(2, ROW(3, '4'))) AS col0"
-    df = wr.athena.read_sql_query(sql=sql, database=database, ctas_approach=True)
-    assert len(df.index) == 1
-    assert len(df.columns) == 1
-    assert df["col0"].iloc[0]["field0"] == 1
-    assert df["col0"].iloc[0]["field1"]["field0"] == 2
-    assert df["col0"].iloc[0]["field1"]["field1"]["field0"] == 3
-    assert df["col0"].iloc[0]["field1"]["field1"]["field1"] == "4"
+    assert len(df.index) == 3
+    ensure_data_types(df=df, has_list=True)
+    final_destination = f"{path3}{table2}/"
 
+    # keep_files=False
+    wr.s3.delete_objects(path=path3)
+    dfs = wr.athena.read_sql_query(
+        sql=f"SELECT * FROM {table}",
+        database=database,
+        ctas_approach=True,
+        chunksize=1,
+        keep_files=False,
+        ctas_temp_table_name=table2,
+        s3_output=path3,
+    )
+    assert wr.catalog.does_table_exist(database=database, table=table2) is False
+    assert len(wr.s3.list_objects(path=path3)) > 2
+    assert len(wr.s3.list_objects(path=final_destination)) > 0
+    for df in dfs:
+        ensure_data_types(df=df, has_list=True)
+    assert len(wr.s3.list_objects(path=path3)) == 0
 
-def test_athena_time_zone(database):
-    sql = "SELECT current_timestamp AS value, typeof(current_timestamp) AS type"
-    df = wr.athena.read_sql_query(sql=sql, database=database, ctas_approach=False)
-    assert len(df.index) == 1
-    assert len(df.columns) == 2
-    assert df["type"][0] == "timestamp with time zone"
-    assert df["value"][0].year == datetime.datetime.utcnow().year
+    # keep_files=True
+    wr.s3.delete_objects(path=path3)
+    dfs = wr.athena.read_sql_query(
+        sql=f"SELECT * FROM {table}",
+        database=database,
+        ctas_approach=True,
+        chunksize=2,
+        keep_files=True,
+        ctas_temp_table_name=table2,
+        s3_output=path3,
+    )
+    assert wr.catalog.does_table_exist(database=database, table=table2) is False
+    assert len(wr.s3.list_objects(path=path3)) > 2
+    assert len(wr.s3.list_objects(path=final_destination)) > 0
+    for df in dfs:
+        ensure_data_types(df=df, has_list=True)
+    assert len(wr.s3.list_objects(path=path3)) > 2
 
 
-def test_category(bucket, database):
-    df = get_df_category()
-    path = f"s3://{bucket}/test_category/"
+def test_athena(path, database, kms_key, workgroup0, workgroup1):
+    wr.catalog.delete_table_if_exists(database=database, table="__test_athena")
     paths = wr.s3.to_parquet(
-        df=df,
+        df=get_df(),
         path=path,
+        index=True,
+        use_threads=True,
         dataset=True,
-        database=database,
-        table="test_category",
         mode="overwrite",
+        database=database,
+        table="__test_athena",
         partition_cols=["par0", "par1"],
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.s3.read_parquet(path=path, dataset=True, categories=[c for c in df.columns if c not in ["par0", "par1"]])
-    ensure_data_types_category(df2)
-    df2 = wr.athena.read_sql_query("SELECT * FROM test_category", database=database, categories=list(df.columns))
-    ensure_data_types_category(df2)
-    df2 = wr.athena.read_sql_table(table="test_category", database=database, categories=list(df.columns))
-    ensure_data_types_category(df2)
-    df2 = wr.athena.read_sql_query(
-        "SELECT * FROM test_category", database=database, categories=list(df.columns), ctas_approach=False
-    )
-    ensure_data_types_category(df2)
     dfs = wr.athena.read_sql_query(
-        "SELECT * FROM test_category", database=database, categories=list(df.columns), ctas_approach=False, chunksize=1
+        sql="SELECT * FROM __test_athena",
+        database=database,
+        ctas_approach=False,
+        chunksize=1,
+        encryption="SSE_KMS",
+        kms_key=kms_key,
+        workgroup=workgroup0,
+        keep_files=False,
     )
     for df2 in dfs:
-        ensure_data_types_category(df2)
-    dfs = wr.athena.read_sql_query(
-        "SELECT * FROM test_category", database=database, categories=list(df.columns), ctas_approach=True, chunksize=1
+        print(df2)
+        ensure_data_types(df=df2)
+    df = wr.athena.read_sql_query(
+        sql="SELECT * FROM __test_athena",
+        database=database,
+        ctas_approach=False,
+        workgroup=workgroup1,
+        keep_files=False,
     )
-    for df2 in dfs:
-        ensure_data_types_category(df2)
-    wr.s3.delete_objects(path=paths)
-    assert wr.catalog.delete_table_if_exists(database=database, table="test_category") is True
+    assert len(df.index) == 3
+    ensure_data_types(df=df)
+    wr.athena.repair_table(table="__test_athena", database=database)
+    wr.catalog.delete_table_if_exists(database=database, table="__test_athena")
 
 
-def test_parquet_validate_schema(path):
+def test_csv(bucket):
+    session = boto3.Session()
     df = pd.DataFrame({"id": [1, 2, 3]})
-    path_file = f"{path}0.parquet"
-    wr.s3.to_parquet(df=df, path=path_file)
-    wr.s3.wait_objects_exist(paths=[path_file])
-    df2 = pd.DataFrame({"id2": [1, 2, 3], "val": ["foo", "boo", "bar"]})
-    path_file2 = f"{path}1.parquet"
-    wr.s3.to_parquet(df=df2, path=path_file2)
-    wr.s3.wait_objects_exist(paths=[path_file2], use_threads=False)
-    df3 = wr.s3.read_parquet(path=path, validate_schema=False)
-    assert len(df3.index) == 6
-    assert len(df3.columns) == 3
-    with pytest.raises(ValueError):
-        wr.s3.read_parquet(path=path, validate_schema=True)
+    path0 = f"s3://{bucket}/test_csv0.csv"
+    path1 = f"s3://{bucket}/test_csv1.csv"
+    path2 = f"s3://{bucket}/test_csv2.csv"
+    wr.s3.to_csv(df=df, path=path0, index=False)
+    wr.s3.wait_objects_exist(paths=[path0])
+    assert wr.s3.does_object_exist(path=path0) is True
+    assert wr.s3.size_objects(path=[path0], use_threads=False)[path0] == 9
+    assert wr.s3.size_objects(path=[path0], use_threads=True)[path0] == 9
+    wr.s3.to_csv(df=df, path=path1, index=False, boto3_session=None)
+    wr.s3.to_csv(df=df, path=path2, index=False, boto3_session=session)
+    assert df.equals(wr.s3.read_csv(path=path0, use_threads=False))
+    assert df.equals(wr.s3.read_csv(path=path0, use_threads=True))
+    assert df.equals(wr.s3.read_csv(path=path0, use_threads=False, boto3_session=session))
+    assert df.equals(wr.s3.read_csv(path=path0, use_threads=True, boto3_session=session))
+    paths = [path0, path1, path2]
+    df2 = pd.concat(objs=[df, df, df], sort=False, ignore_index=True)
+    assert df2.equals(wr.s3.read_csv(path=paths, use_threads=False))
+    assert df2.equals(wr.s3.read_csv(path=paths, use_threads=True))
+    assert df2.equals(wr.s3.read_csv(path=paths, use_threads=False, boto3_session=session))
+    assert df2.equals(wr.s3.read_csv(path=paths, use_threads=True, boto3_session=session))
+    with pytest.raises(wr.exceptions.InvalidArgumentType):
+        wr.s3.read_csv(path=1)
+    with pytest.raises(wr.exceptions.InvalidArgument):
+        wr.s3.read_csv(path=paths, iterator=True)
+    wr.s3.delete_objects(path=paths, use_threads=False)
+    wr.s3.wait_objects_not_exist(paths=paths, use_threads=False)
 
 
-def test_csv_dataset(bucket, database):
-    path = f"s3://{bucket}/test_csv_dataset/"
-    with pytest.raises(wr.exceptions.UndetectedType):
-        wr.s3.to_csv(pd.DataFrame({"A": [None]}), path, dataset=True, database=database, table="test_csv_dataset")
-    df = get_df_csv()
+def test_json(bucket):
+    df0 = pd.DataFrame({"id": [1, 2, 3]})
+    path0 = f"s3://{bucket}/test_json0.json"
+    path1 = f"s3://{bucket}/test_json1.json"
+    wr.s3.to_json(df=df0, path=path0)
+    wr.s3.to_json(df=df0, path=path1)
+    wr.s3.wait_objects_exist(paths=[path0, path1])
+    assert df0.equals(wr.s3.read_json(path=path0, use_threads=False))
+    df1 = pd.concat(objs=[df0, df0], sort=False, ignore_index=True)
+    assert df1.equals(wr.s3.read_json(path=[path0, path1], use_threads=True))
+    wr.s3.delete_objects(path=[path0, path1], use_threads=False)
+
+
+def test_fwf(path):
+    text = "1 Herfelingen27-12-18\n2   Lambusart14-06-18\n3Spormaggiore15-04-18"
+    client_s3 = boto3.client("s3")
+    path0 = f"{path}/0.txt"
+    bucket, key = wr._utils.parse_path(path0)
+    client_s3.put_object(Body=text, Bucket=bucket, Key=key)
+    path1 = f"{path}/1.txt"
+    bucket, key = wr._utils.parse_path(path1)
+    client_s3.put_object(Body=text, Bucket=bucket, Key=key)
+    wr.s3.wait_objects_exist(paths=[path0, path1])
+    df = wr.s3.read_fwf(path=path0, use_threads=False, widths=[1, 12, 8], names=["id", "name", "date"])
+    assert len(df.index) == 3
+    assert len(df.columns) == 3
+    df = wr.s3.read_fwf(path=[path0, path1], use_threads=True, widths=[1, 12, 8], names=["id", "name", "date"])
+    assert len(df.index) == 6
+    assert len(df.columns) == 3
+
+
+def test_parquet(bucket):
+    wr.s3.delete_objects(path=f"s3://{bucket}/test_parquet/")
+    df_file = pd.DataFrame({"id": [1, 2, 3]})
+    path_file = f"s3://{bucket}/test_parquet/test_parquet_file.parquet"
+    df_dataset = pd.DataFrame({"id": [1, 2, 3], "partition": ["A", "A", "B"]})
+    df_dataset["partition"] = df_dataset["partition"].astype("category")
+    path_dataset = f"s3://{bucket}/test_parquet/test_parquet_dataset"
     with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_csv(df, path, dataset=False, mode="overwrite", database=database, table="test_csv_dataset")
+        wr.s3.to_parquet(df=df_file, path=path_file, mode="append")
+    with pytest.raises(wr.exceptions.InvalidCompression):
+        wr.s3.to_parquet(df=df_file, path=path_file, compression="WRONG")
     with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_csv(df, path, dataset=False, table="test_csv_dataset")
+        wr.s3.to_parquet(df=df_dataset, path=path_dataset, partition_cols=["col2"])
     with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_csv(df, path, dataset=True, mode="overwrite", database=database)
+        wr.s3.to_parquet(df=df_dataset, path=path_dataset, description="foo")
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        wr.s3.to_parquet(df=df_dataset, path=path_dataset, partition_cols=["col2"], dataset=True, mode="WRONG")
+    paths = wr.s3.to_parquet(df=df_file, path=path_file)["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    assert len(wr.s3.read_parquet(path=path_file, use_threads=True, boto3_session=None).index) == 3
+    assert len(wr.s3.read_parquet(path=[path_file], use_threads=False, boto3_session=boto3.Session()).index) == 3
+    paths = wr.s3.to_parquet(df=df_dataset, path=path_dataset, dataset=True)["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    assert len(wr.s3.read_parquet(path=paths, dataset=True).index) == 3
+    assert len(wr.s3.read_parquet(path=path_dataset, use_threads=True, boto3_session=boto3.Session()).index) == 3
+    dataset_paths = wr.s3.to_parquet(
+        df=df_dataset, path=path_dataset, dataset=True, partition_cols=["partition"], mode="overwrite"
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=dataset_paths)
+    assert len(wr.s3.read_parquet(path=path_dataset, use_threads=True, boto3_session=None).index) == 3
+    assert len(wr.s3.read_parquet(path=dataset_paths, use_threads=True).index) == 3
+    assert len(wr.s3.read_parquet(path=path_dataset, dataset=True, use_threads=True).index) == 3
+    wr.s3.to_parquet(df=df_dataset, path=path_dataset, dataset=True, partition_cols=["partition"], mode="overwrite")
+    wr.s3.to_parquet(
+        df=df_dataset, path=path_dataset, dataset=True, partition_cols=["partition"], mode="overwrite_partitions"
+    )
+    wr.s3.delete_objects(path=f"s3://{bucket}/test_parquet/")
+
+
+def test_parquet_catalog(bucket, database):
+    with pytest.raises(wr.exceptions.UndetectedType):
+        wr.s3.to_parquet(
+            df=pd.DataFrame({"A": [None]}),
+            path=f"s3://{bucket}/test_parquet_catalog",
+            dataset=True,
+            database=database,
+            table="test_parquet_catalog",
+        )
+    df = get_df_list()
     with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_csv(df=df, path=path, mode="append")
+        wr.s3.to_parquet(
+            df=df,
+            path=f"s3://{bucket}/test_parquet_catalog",
+            use_threads=True,
+            dataset=False,
+            mode="overwrite",
+            database=database,
+            table="test_parquet_catalog",
+        )
     with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_csv(df=df, path=path, partition_cols=["col2"])
+        wr.s3.to_parquet(
+            df=df,
+            path=f"s3://{bucket}/test_parquet_catalog",
+            use_threads=True,
+            dataset=False,
+            table="test_parquet_catalog",
+        )
     with pytest.raises(wr.exceptions.InvalidArgumentCombination):
-        wr.s3.to_csv(df=df, path=path, description="foo")
-    with pytest.raises(wr.exceptions.InvalidArgumentValue):
-        wr.s3.to_csv(df=df, path=path, partition_cols=["col2"], dataset=True, mode="WRONG")
-    paths = wr.s3.to_csv(
+        wr.s3.to_parquet(
+            df=df,
+            path=f"s3://{bucket}/test_parquet_catalog",
+            use_threads=True,
+            dataset=True,
+            mode="overwrite",
+            database=database,
+        )
+    wr.s3.to_parquet(
         df=df,
-        path=path,
-        sep="|",
-        index=False,
+        path=f"s3://{bucket}/test_parquet_catalog",
         use_threads=True,
-        boto3_session=None,
-        s3_additional_kwargs=None,
         dataset=True,
-        partition_cols=["par0", "par1"],
         mode="overwrite",
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.s3.read_csv(path=paths, sep="|", header=None)
-    assert len(df2.index) == 3
-    assert len(df2.columns) == 8
-    assert df2[0].sum() == 6
-    wr.s3.delete_objects(path=paths)
-
-
-def test_csv_catalog(bucket, database):
-    path = f"s3://{bucket}/test_csv_catalog/"
-    df = get_df_csv()
-    paths = wr.s3.to_csv(
+        database=database,
+        table="test_parquet_catalog",
+    )
+    wr.s3.to_parquet(
         df=df,
-        path=path,
-        sep="\t",
+        path=f"s3://{bucket}/test_parquet_catalog2",
         index=True,
         use_threads=True,
-        boto3_session=None,
-        s3_additional_kwargs=None,
-        dataset=True,
-        partition_cols=["par0", "par1"],
-        mode="overwrite",
-        table="test_csv_catalog",
-        database=database,
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table("test_csv_catalog", database)
-    assert len(df2.index) == 3
-    assert len(df2.columns) == 11
-    assert df2["id"].sum() == 6
-    ensure_data_types_csv(df2)
-    wr.s3.delete_objects(path=paths)
-    assert wr.catalog.delete_table_if_exists(database=database, table="test_csv_catalog") is True
-
-
-def test_csv_catalog_columns(bucket, database):
-    path = f"s3://{bucket}/test_csv_catalog_columns /"
-    paths = wr.s3.to_csv(
-        df=get_df_csv(),
-        path=path,
-        sep="|",
-        columns=["id", "date", "timestamp", "par0", "par1"],
-        index=False,
-        use_threads=False,
-        boto3_session=None,
-        s3_additional_kwargs=None,
         dataset=True,
-        partition_cols=["par0", "par1"],
         mode="overwrite",
-        table="test_csv_catalog_columns",
-        database=database,
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table("test_csv_catalog_columns", database)
-    assert len(df2.index) == 3
-    assert len(df2.columns) == 5
-    assert df2["id"].sum() == 6
-    ensure_data_types_csv(df2)
-
-    paths = wr.s3.to_csv(
-        df=pd.DataFrame({"id": [4], "date": [None], "timestamp": [None], "par0": [1], "par1": ["a"]}),
-        path=path,
-        sep="|",
-        index=False,
-        use_threads=False,
-        boto3_session=None,
-        s3_additional_kwargs=None,
-        dataset=True,
-        partition_cols=["par0", "par1"],
-        mode="overwrite_partitions",
-        table="test_csv_catalog_columns",
         database=database,
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table("test_csv_catalog_columns", database)
-    assert len(df2.index) == 3
-    assert len(df2.columns) == 5
-    assert df2["id"].sum() == 9
-    ensure_data_types_csv(df2)
-
-    wr.s3.delete_objects(path=path)
-    assert wr.catalog.delete_table_if_exists(database=database, table="test_csv_catalog_columns") is True
-
-
-def test_athena_types(bucket, database):
-    path = f"s3://{bucket}/test_athena_types/"
-    df = get_df_csv()
-    paths = wr.s3.to_csv(
-        df=df,
-        path=path,
-        sep=",",
-        index=False,
-        use_threads=True,
-        boto3_session=None,
-        s3_additional_kwargs=None,
-        dataset=True,
-        partition_cols=["par0", "par1"],
-        mode="overwrite",
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    columns_types, partitions_types = wr.catalog.extract_athena_types(
-        df=df, index=False, partition_cols=["par0", "par1"], file_format="csv"
-    )
-    wr.catalog.create_csv_table(
-        table="test_athena_types",
-        database=database,
-        path=path,
-        partitions_types=partitions_types,
-        columns_types=columns_types,
-    )
-    wr.catalog.create_csv_table(
-        database=database, table="test_athena_types", path=path, columns_types={"col0": "string"}, mode="append"
-    )
-    wr.athena.repair_table("test_athena_types", database)
-    assert len(wr.catalog.get_csv_partitions(database, "test_athena_types")) == 3
-    df2 = wr.athena.read_sql_table("test_athena_types", database)
-    assert len(df2.index) == 3
-    assert len(df2.columns) == 10
-    assert df2["id"].sum() == 6
-    ensure_data_types_csv(df2)
-    wr.s3.delete_objects(path=paths)
-    assert wr.catalog.delete_table_if_exists(database=database, table="test_athena_types") is True
-
-
-def test_parquet_catalog_columns(bucket, database):
-    path = f"s3://{bucket}/test_parquet_catalog_columns/"
-    paths = wr.s3.to_parquet(
-        df=get_df_csv()[["id", "date", "timestamp", "par0", "par1"]],
-        path=path,
-        index=False,
-        use_threads=False,
-        boto3_session=None,
-        s3_additional_kwargs=None,
-        dataset=True,
-        partition_cols=["par0", "par1"],
-        mode="overwrite",
-        table="test_parquet_catalog_columns",
-        database=database,
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table("test_parquet_catalog_columns", database)
-    assert len(df2.index) == 3
-    assert len(df2.columns) == 5
-    assert df2["id"].sum() == 6
-    ensure_data_types_csv(df2)
-
-    paths = wr.s3.to_parquet(
-        df=pd.DataFrame({"id": [4], "date": [None], "timestamp": [None], "par0": [1], "par1": ["a"]}),
-        path=path,
-        index=False,
-        use_threads=False,
-        boto3_session=None,
-        s3_additional_kwargs=None,
-        dataset=True,
-        partition_cols=["par0", "par1"],
-        mode="overwrite_partitions",
-        table="test_parquet_catalog_columns",
-        database=database,
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table("test_parquet_catalog_columns", database)
-    assert len(df2.index) == 3
-    assert len(df2.columns) == 5
-    assert df2["id"].sum() == 9
-    ensure_data_types_csv(df2)
-
-    wr.s3.delete_objects(path=path)
-    assert wr.catalog.delete_table_if_exists(database=database, table="test_parquet_catalog_columns") is True
-
-
-@pytest.mark.parametrize("compression", [None, "gzip", "snappy"])
-def test_parquet_compress(bucket, database, compression):
-    path = f"s3://{bucket}/test_parquet_compress_{compression}/"
-    paths = wr.s3.to_parquet(
-        df=get_df(),
-        path=path,
-        compression=compression,
-        dataset=True,
-        database=database,
-        table=f"test_parquet_compress_{compression}",
-        mode="overwrite",
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table(f"test_parquet_compress_{compression}", database)
-    ensure_data_types(df2)
-    df2 = wr.s3.read_parquet(path=path)
-    wr.s3.delete_objects(path=path)
-    assert wr.catalog.delete_table_if_exists(database=database, table=f"test_parquet_compress_{compression}") is True
-    ensure_data_types(df2)
-
-
-@pytest.mark.parametrize("compression", ["gzip", "bz2", "xz"])
-def test_csv_compress(bucket, compression):
-    path = f"s3://{bucket}/test_csv_compress_{compression}/"
-    wr.s3.delete_objects(path=path)
-    df = get_df_csv()
-    if compression == "gzip":
-        buffer = BytesIO()
-        with gzip.GzipFile(mode="w", fileobj=buffer) as zipped_file:
-            df.to_csv(TextIOWrapper(zipped_file, "utf8"), index=False, header=None)
-        s3_resource = boto3.resource("s3")
-        s3_object = s3_resource.Object(bucket, f"test_csv_compress_{compression}/test.csv.gz")
-        s3_object.put(Body=buffer.getvalue())
-        file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv.gz"
-    elif compression == "bz2":
-        buffer = BytesIO()
-        with bz2.BZ2File(mode="w", filename=buffer) as zipped_file:
-            df.to_csv(TextIOWrapper(zipped_file, "utf8"), index=False, header=None)
-        s3_resource = boto3.resource("s3")
-        s3_object = s3_resource.Object(bucket, f"test_csv_compress_{compression}/test.csv.bz2")
-        s3_object.put(Body=buffer.getvalue())
-        file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv.bz2"
-    elif compression == "xz":
-        buffer = BytesIO()
-        with lzma.LZMAFile(mode="w", filename=buffer) as zipped_file:
-            df.to_csv(TextIOWrapper(zipped_file, "utf8"), index=False, header=None)
-        s3_resource = boto3.resource("s3")
-        s3_object = s3_resource.Object(bucket, f"test_csv_compress_{compression}/test.csv.xz")
-        s3_object.put(Body=buffer.getvalue())
-        file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv.xz"
-    else:
-        file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv"
-        wr.s3.to_csv(df=df, path=file_path, index=False, header=None)
-
-    wr.s3.wait_objects_exist(paths=[file_path])
-    df2 = wr.s3.read_csv(path=[file_path], names=df.columns)
-    assert len(df2.index) == 3
-    assert len(df2.columns) == 10
-    dfs = wr.s3.read_csv(path=[file_path], names=df.columns, chunksize=1)
-    for df3 in dfs:
-        assert len(df3.columns) == 10
-    wr.s3.delete_objects(path=path)
-
-
-def test_parquet_char_length(path, database, table, external_schema):
-    df = pd.DataFrame(
-        {"id": [1, 2], "cchar": ["foo", "boo"], "date": [datetime.date(2020, 1, 1), datetime.date(2020, 1, 2)]}
-    )
-    wr.s3.to_parquet(
-        df=df,
-        path=path,
-        dataset=True,
-        database=database,
-        table=table,
-        mode="overwrite",
-        partition_cols=["date"],
-        dtype={"cchar": "char(3)"},
+        table="test_parquet_catalog2",
+        partition_cols=["iint8", "iint16"],
     )
-
-    df2 = wr.s3.read_parquet(path, dataset=True)
-    assert len(df2.index) == 2
-    assert len(df2.columns) == 3
-    assert df2.id.sum() == 3
-
-    df2 = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df2.index) == 2
-    assert len(df2.columns) == 3
-    assert df2.id.sum() == 3
-
-    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
-    df2 = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
-    assert len(df2.index) == 2
-    assert len(df2.columns) == 3
-    assert df2.id.sum() == 3
-
-
-def test_merge(bucket):
-    path = f"s3://{bucket}/test_merge/"
-    df = pd.DataFrame({"id": [1, 2, 3], "par": [1, 2, 3]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df = wr.s3.read_parquet(path=path, dataset=True)
-    assert df.id.sum() == 6
-    assert df.par.astype("Int64").sum() == 6
-
-    path2 = f"s3://{bucket}/test_merge2/"
-    df = pd.DataFrame({"id": [1, 2, 3], "par": [1, 2, 3]})
-    paths = wr.s3.to_parquet(df=df, path=path2, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    paths = wr.s3.merge_datasets(source_path=path2, target_path=path, mode="append", use_threads=True)
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = wr.s3.read_parquet(path=path, dataset=True)
-    assert df.id.sum() == 12
-    assert df.par.astype("Int64").sum() == 12
-
-    paths = wr.s3.merge_datasets(source_path=path2, target_path=path, mode="overwrite", use_threads=False)
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = wr.s3.read_parquet(path=path, dataset=True)
-    assert df.id.sum() == 6
-    assert df.par.astype("Int64").sum() == 6
-
-    df = pd.DataFrame({"id": [4], "par": [3]})
-    paths = wr.s3.to_parquet(df=df, path=path2, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    paths = wr.s3.merge_datasets(source_path=path2, target_path=path, mode="overwrite_partitions", use_threads=True)
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = wr.s3.read_parquet(path=path, dataset=True)
-    assert df.id.sum() == 7
-    assert df.par.astype("Int64").sum() == 6
-
-    with pytest.raises(wr.exceptions.InvalidArgumentValue):
-        wr.s3.merge_datasets(source_path=path, target_path="bar", mode="WRONG")
-
-    assert len(wr.s3.merge_datasets(source_path=f"s3://{bucket}/empty/", target_path="bar")) == 0
-
-    wr.s3.delete_objects(path=path)
-    wr.s3.delete_objects(path=path2)
-
-
-def test_copy(bucket):
-    path = f"s3://{bucket}/test_copy/"
-    df = pd.DataFrame({"id": [1, 2, 3], "par": [1, 2, 3]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df = wr.s3.read_parquet(path=path, dataset=True)
-    assert df.id.sum() == 6
-    assert df.par.astype("Int64").sum() == 6
-
-    path2 = f"s3://{bucket}/test_copy2/"
-    df = pd.DataFrame({"id": [1, 2, 3], "par": [1, 2, 3]})
-    paths = wr.s3.to_parquet(df=df, path=path2, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    paths = wr.s3.copy_objects(paths, source_path=path2, target_path=path, use_threads=True)
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = wr.s3.read_parquet(path=path, dataset=True)
-    assert df.id.sum() == 12
-    assert df.par.astype("Int64").sum() == 12
-
-    assert len(wr.s3.copy_objects([], source_path="boo", target_path="bar")) == 0
-
-    wr.s3.delete_objects(path=path)
-    wr.s3.delete_objects(path=path2)
-
-
-@pytest.mark.parametrize("col2", [[1, 1, 1, 1, 1], [1, 2, 3, 4, 5], [1, 1, 1, 1, 2], [1, 2, 2, 2, 2]])
-@pytest.mark.parametrize("chunked", [True, 1, 2, 100])
-def test_parquet_chunked(bucket, database, col2, chunked):
-    table = f"test_parquet_chunked_{chunked}_{''.join([str(x) for x in col2])}"
-    path = f"s3://{bucket}/{table}/"
-    wr.s3.delete_objects(path=path)
-    values = list(range(5))
-    df = pd.DataFrame({"col1": values, "col2": col2})
-    paths = wr.s3.to_parquet(
-        df, path, index=False, dataset=True, database=database, table=table, partition_cols=["col2"], mode="overwrite"
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-
-    dfs = list(wr.s3.read_parquet(path=path, dataset=True, chunked=chunked))
-    assert sum(values) == pd.concat(dfs, ignore_index=True).col1.sum()
-    if chunked is not True:
-        assert len(dfs) == int(math.ceil(len(df) / chunked))
-        for df2 in dfs[:-1]:
-            assert chunked == len(df2)
-        assert chunked >= len(dfs[-1])
-    else:
-        assert len(dfs) == len(set(col2))
-
-    dfs = list(wr.athena.read_sql_table(database=database, table=table, chunksize=chunked))
-    assert sum(values) == pd.concat(dfs, ignore_index=True).col1.sum()
-    if chunked is not True:
-        assert len(dfs) == int(math.ceil(len(df) / chunked))
-        for df2 in dfs[:-1]:
-            assert chunked == len(df2)
-        assert chunked >= len(dfs[-1])
-
-    wr.s3.delete_objects(path=paths)
-    assert wr.catalog.delete_table_if_exists(database=database, table=table) is True
-
-
-@pytest.mark.parametrize("workgroup", [None, 0, 1, 2, 3])
-@pytest.mark.parametrize("encryption", [None, "SSE_S3", "SSE_KMS"])
-# @pytest.mark.parametrize("workgroup", [3])
-# @pytest.mark.parametrize("encryption", [None])
-def test_athena_encryption(
-    path, path2, database, table, table2, kms_key, encryption, workgroup, workgroup0, workgroup1, workgroup2, workgroup3
-):
-    kms_key = None if (encryption == "SSE_S3") or (encryption is None) else kms_key
-    if workgroup == 0:
-        workgroup = workgroup0
-    elif workgroup == 1:
-        workgroup = workgroup1
-    elif workgroup == 2:
-        workgroup = workgroup2
-    elif workgroup == 3:
-        workgroup = workgroup3
-    df = pd.DataFrame({"a": [1, 2], "b": ["foo", "boo"]})
-    paths = wr.s3.to_parquet(
-        df=df, path=path, dataset=True, mode="overwrite", database=database, table=table, s3_additional_kwargs=None
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.athena.read_sql_table(
-        table=table,
-        ctas_approach=True,
-        database=database,
-        encryption=encryption,
-        workgroup=workgroup,
-        kms_key=kms_key,
-        keep_files=True,
-        ctas_temp_table_name=table2,
-        s3_output=path2,
+    columns_types, partitions_types = wr.s3.read_parquet_metadata(
+        path=f"s3://{bucket}/test_parquet_catalog2", dataset=True
     )
-    assert wr.catalog.does_table_exist(database=database, table=table2) is False
-    assert len(df2.index) == 2
-    assert len(df2.columns) == 2
-
-
-def test_athena_nested(path, database, table):
-    df = pd.DataFrame(
-        {
-            "c0": [[1, 2, 3], [4, 5, 6]],
-            "c1": [[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
-            "c2": [[["a", "b"], ["c", "d"]], [["e", "f"], ["g", "h"]]],
-            "c3": [[], [[[[[[[[1]]]]]]]]],
-            "c4": [{"a": 1}, {"a": 1}],
-            "c5": [{"a": {"b": {"c": [1, 2]}}}, {"a": {"b": {"c": [3, 4]}}}],
-        }
+    assert len(columns_types) == 17
+    assert len(partitions_types) == 2
+    columns_types, partitions_types, partitions_values = wr.s3.store_parquet_metadata(
+        path=f"s3://{bucket}/test_parquet_catalog2", database=database, table="test_parquet_catalog2", dataset=True
     )
-    paths = wr.s3.to_parquet(
-        df=df, path=path, index=False, use_threads=True, dataset=True, mode="overwrite", database=database, table=table
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_query(sql=f"SELECT c0, c1, c2, c4 FROM {table}", database=database)
-    assert len(df2.index) == 2
-    assert len(df2.columns) == 4
-
-
-def test_catalog_versioning(bucket, database):
-    table = "test_catalog_versioning"
-    wr.catalog.delete_table_if_exists(database=database, table=table)
-    path = f"s3://{bucket}/{table}/"
-    wr.s3.delete_objects(path=path)
-
-    # Version 0
-    df = pd.DataFrame({"c0": [1, 2]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table, mode="overwrite")["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) == 2
-    assert len(df.columns) == 1
-    assert str(df.c0.dtype).startswith("Int")
-
-    # Version 1
-    df = pd.DataFrame({"c1": ["foo", "boo"]})
-    paths1 = wr.s3.to_parquet(
-        df=df, path=path, dataset=True, database=database, table=table, mode="overwrite", catalog_versioning=True
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths1, use_threads=False)
-    df = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) == 2
-    assert len(df.columns) == 1
-    assert str(df.c1.dtype) == "string"
+    assert len(columns_types) == 17
+    assert len(partitions_types) == 2
+    assert len(partitions_values) == 2
+    wr.s3.delete_objects(path=f"s3://{bucket}/test_parquet_catalog/")
+    wr.s3.delete_objects(path=f"s3://{bucket}/test_parquet_catalog2/")
+    assert wr.catalog.delete_table_if_exists(database=database, table="test_parquet_catalog") is True
+    assert wr.catalog.delete_table_if_exists(database=database, table="test_parquet_catalog2") is True
 
-    # Version 2
-    df = pd.DataFrame({"c1": [1.0, 2.0]})
-    paths2 = wr.s3.to_csv(
+
+def test_parquet_catalog_duplicated(bucket, database):
+    path = f"s3://{bucket}/test_parquet_catalog_dedup/"
+    df = pd.DataFrame({"A": [1], "a": [1]})
+    wr.s3.to_parquet(
         df=df,
         path=path,
+        index=False,
         dataset=True,
-        database=database,
-        table=table,
         mode="overwrite",
-        catalog_versioning=True,
-        index=False,
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths2, use_threads=False)
-    wr.s3.wait_objects_not_exist(paths=paths1, use_threads=False)
-    df = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) == 2
+        database=database,
+        table="test_parquet_catalog_dedup",
+    )
+    df = wr.s3.read_parquet(path=path)
+    assert len(df.index) == 1
     assert len(df.columns) == 1
-    assert str(df.c1.dtype).startswith("float")
+    wr.s3.delete_objects(path=path)
+    assert wr.catalog.delete_table_if_exists(database=database, table="test_parquet_catalog_dedup") is True
 
-    # Version 3 (removing version 2)
-    df = pd.DataFrame({"c1": [True, False]})
-    paths3 = wr.s3.to_csv(
-        df=df,
+
+def test_parquet_catalog_casting(bucket, database):
+    path = f"s3://{bucket}/test_parquet_catalog_casting/"
+    paths = wr.s3.to_parquet(
+        df=get_df_cast(),
         path=path,
+        index=False,
         dataset=True,
-        database=database,
-        table=table,
         mode="overwrite",
-        catalog_versioning=False,
-        index=False,
+        database=database,
+        table="__test_parquet_catalog_casting",
+        dtype={
+            "iint8": "tinyint",
+            "iint16": "smallint",
+            "iint32": "int",
+            "iint64": "bigint",
+            "float": "float",
+            "double": "double",
+            "decimal": "decimal(3,2)",
+            "string": "string",
+            "date": "date",
+            "timestamp": "timestamp",
+            "bool": "boolean",
+            "binary": "binary",
+            "category": "double",
+            "par0": "bigint",
+            "par1": "string",
+        },
     )["paths"]
-    wr.s3.wait_objects_exist(paths=paths3, use_threads=False)
-    wr.s3.wait_objects_not_exist(paths=paths2, use_threads=False)
-    df = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) == 2
-    assert len(df.columns) == 1
-    assert str(df.c1.dtype).startswith("boolean")
-
-    # Cleaning Up
-    wr.catalog.delete_table_if_exists(database=database, table=table)
+    wr.s3.wait_objects_exist(paths=paths)
+    df = wr.s3.read_parquet(path=path)
+    assert len(df.index) == 3
+    assert len(df.columns) == 15
+    ensure_data_types(df=df, has_list=False)
+    df = wr.athena.read_sql_table(table="__test_parquet_catalog_casting", database=database, ctas_approach=True)
+    assert len(df.index) == 3
+    assert len(df.columns) == 15
+    ensure_data_types(df=df, has_list=False)
+    df = wr.athena.read_sql_table(table="__test_parquet_catalog_casting", database=database, ctas_approach=False)
+    assert len(df.index) == 3
+    assert len(df.columns) == 15
+    ensure_data_types(df=df, has_list=False)
     wr.s3.delete_objects(path=path)
+    assert wr.catalog.delete_table_if_exists(database=database, table="__test_parquet_catalog_casting") is True
 
 
-def test_copy_replacing_filename(bucket):
-    path = f"s3://{bucket}/test_copy_replacing_filename/"
-    wr.s3.delete_objects(path=path)
-    df = pd.DataFrame({"c0": [1, 2]})
-    file_path = f"{path}myfile.parquet"
-    wr.s3.to_parquet(df=df, path=file_path)
-    wr.s3.wait_objects_exist(paths=[file_path], use_threads=False)
-    path2 = f"s3://{bucket}/test_copy_replacing_filename2/"
-    wr.s3.copy_objects(
-        paths=[file_path], source_path=path, target_path=path2, replace_filenames={"myfile.parquet": "myfile2.parquet"}
+def test_catalog(path, database, table):
+    account_id = boto3.client("sts").get_caller_identity().get("Account")
+    assert wr.catalog.does_table_exist(database=database, table=table) is False
+    wr.catalog.create_parquet_table(
+        database=database,
+        table=table,
+        path=path,
+        columns_types={"col0": "int", "col1": "double"},
+        partitions_types={"y": "int", "m": "int"},
+        compression="snappy",
     )
-    expected_file = f"{path2}myfile2.parquet"
-    wr.s3.wait_objects_exist(paths=[expected_file], use_threads=False)
-    objs = wr.s3.list_objects(path=path2)
-    assert objs[0] == expected_file
-    wr.s3.delete_objects(path=path)
-    wr.s3.delete_objects(path=path2)
-
-
-def test_unsigned_parquet(bucket, database, external_schema):
-    table = "test_unsigned_parquet"
-    path = f"s3://{bucket}/{table}/"
-    wr.s3.delete_objects(path=path)
-    df = pd.DataFrame({"c0": [0, 0, (2 ** 8) - 1], "c1": [0, 0, (2 ** 16) - 1], "c2": [0, 0, (2 ** 32) - 1]})
-    df["c0"] = df.c0.astype("uint8")
-    df["c1"] = df.c1.astype("uint16")
-    df["c2"] = df.c2.astype("uint32")
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table, mode="overwrite")["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = wr.athena.read_sql_table(table=table, database=database)
-    assert df.c0.sum() == (2 ** 8) - 1
-    assert df.c1.sum() == (2 ** 16) - 1
-    assert df.c2.sum() == (2 ** 32) - 1
-    schema = wr.s3.read_parquet_metadata(path=path)[0]
-    assert schema["c0"] == "smallint"
-    assert schema["c1"] == "int"
-    assert schema["c2"] == "bigint"
-    df = wr.s3.read_parquet(path=path)
-    assert df.c0.sum() == (2 ** 8) - 1
-    assert df.c1.sum() == (2 ** 16) - 1
-    assert df.c2.sum() == (2 ** 32) - 1
-    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
-    df = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
-    assert df.c0.sum() == (2 ** 8) - 1
-    assert df.c1.sum() == (2 ** 16) - 1
-    assert df.c2.sum() == (2 ** 32) - 1
-
-    df = pd.DataFrame({"c0": [0, 0, (2 ** 64) - 1]})
-    df["c0"] = df.c0.astype("uint64")
-    with pytest.raises(wr.exceptions.UnsupportedType):
-        wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table, mode="overwrite")
-
-    wr.s3.delete_objects(path=path)
-    wr.catalog.delete_table_if_exists(database=database, table=table)
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        wr.catalog.create_parquet_table(
+            database=database, table=table, path=path, columns_types={"col0": "string"}, mode="append"
+        )
+    assert wr.catalog.does_table_exist(database=database, table=table) is True
+    assert wr.catalog.delete_table_if_exists(database=database, table=table) is True
+    assert wr.catalog.delete_table_if_exists(database=database, table=table) is False
+    wr.catalog.create_parquet_table(
+        database=database,
+        table=table,
+        path=path,
+        columns_types={"col0": "int", "col1": "double"},
+        partitions_types={"y": "int", "m": "int"},
+        compression="snappy",
+        description="Foo boo bar",
+        parameters={"tag": "test"},
+        columns_comments={"col0": "my int", "y": "year"},
+        mode="overwrite",
+    )
+    wr.catalog.add_parquet_partitions(
+        database=database,
+        table=table,
+        partitions_values={f"{path}y=2020/m=1/": ["2020", "1"], f"{path}y=2021/m=2/": ["2021", "2"]},
+        compression="snappy",
+    )
+    assert wr.catalog.get_table_location(database=database, table=table) == path
+    partitions_values = wr.catalog.get_parquet_partitions(database=database, table=table)
+    assert len(partitions_values) == 2
+    partitions_values = wr.catalog.get_parquet_partitions(
+        database=database, table=table, catalog_id=account_id, expression="y = 2021 AND m = 2"
+    )
+    assert len(partitions_values) == 1
+    assert len(set(partitions_values[f"{path}y=2021/m=2/"]) & {"2021", "2"}) == 2
+    dtypes = wr.catalog.get_table_types(database=database, table=table)
+    assert dtypes["col0"] == "int"
+    assert dtypes["col1"] == "double"
+    assert dtypes["y"] == "int"
+    assert dtypes["m"] == "int"
+    df_dbs = wr.catalog.databases()
+    assert len(wr.catalog.databases(catalog_id=account_id)) == len(df_dbs)
+    assert database in df_dbs["Database"].to_list()
+    tables = list(wr.catalog.get_tables())
+    assert len(tables) > 0
+    for tbl in tables:
+        if tbl["Name"] == table:
+            assert tbl["TableType"] == "EXTERNAL_TABLE"
+    tables = list(wr.catalog.get_tables(database=database))
+    assert len(tables) > 0
+    for tbl in tables:
+        assert tbl["DatabaseName"] == database
+    # search
+    tables = list(wr.catalog.search_tables(text="parquet", catalog_id=account_id))
+    assert len(tables) > 0
+    for tbl in tables:
+        if tbl["Name"] == table:
+            assert tbl["TableType"] == "EXTERNAL_TABLE"
+    # prefix
+    tables = list(wr.catalog.get_tables(name_prefix=table[:4], catalog_id=account_id))
+    assert len(tables) > 0
+    for tbl in tables:
+        if tbl["Name"] == table:
+            assert tbl["TableType"] == "EXTERNAL_TABLE"
+    # suffix
+    tables = list(wr.catalog.get_tables(name_suffix=table[-4:], catalog_id=account_id))
+    assert len(tables) > 0
+    for tbl in tables:
+        if tbl["Name"] == table:
+            assert tbl["TableType"] == "EXTERNAL_TABLE"
+    # name_contains
+    tables = list(wr.catalog.get_tables(name_contains=table[4:-4], catalog_id=account_id))
+    assert len(tables) > 0
+    for tbl in tables:
+        if tbl["Name"] == table:
+            assert tbl["TableType"] == "EXTERNAL_TABLE"
+    # prefix & suffix & name_contains
+    tables = list(
+        wr.catalog.get_tables(
+            name_prefix=table[0], name_contains=table[3], name_suffix=table[-1], catalog_id=account_id
+        )
+    )
+    assert len(tables) > 0
+    for tbl in tables:
+        if tbl["Name"] == table:
+            assert tbl["TableType"] == "EXTERNAL_TABLE"
+    # prefix & suffix
+    tables = list(wr.catalog.get_tables(name_prefix=table[0], name_suffix=table[-1], catalog_id=account_id))
+    assert len(tables) > 0
+    for tbl in tables:
+        if tbl["Name"] == table:
+            assert tbl["TableType"] == "EXTERNAL_TABLE"
+    # DataFrames
+    assert len(wr.catalog.databases().index) > 0
+    assert len(wr.catalog.tables().index) > 0
+    assert (
+        len(
+            wr.catalog.tables(
+                database=database,
+                search_text="parquet",
+                name_prefix=table[0],
+                name_contains=table[3],
+                name_suffix=table[-1],
+                catalog_id=account_id,
+            ).index
+        )
+        > 0
+    )
+    assert len(wr.catalog.table(database=database, table=table).index) > 0
+    assert len(wr.catalog.table(database=database, table=table, catalog_id=account_id).index) > 0
+    with pytest.raises(wr.exceptions.InvalidTable):
+        wr.catalog.overwrite_table_parameters({"foo": "boo"}, database, "fake_table")
 
 
-def test_parquet_uint64(bucket):
-    path = f"s3://{bucket}/test_parquet_uint64/"
-    wr.s3.delete_objects(path=path)
-    df = pd.DataFrame(
-        {
-            "c0": [0, 0, (2 ** 8) - 1],
-            "c1": [0, 0, (2 ** 16) - 1],
-            "c2": [0, 0, (2 ** 32) - 1],
-            "c3": [0, 0, (2 ** 64) - 1],
-            "c4": [0, 1, 2],
-        }
-    )
-    print(df)
-    df["c0"] = df.c0.astype("uint8")
-    df["c1"] = df.c1.astype("uint16")
-    df["c2"] = df.c2.astype("uint32")
-    df["c3"] = df.c3.astype("uint64")
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite", partition_cols=["c4"])["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = wr.s3.read_parquet(path=path, dataset=True)
-    print(df)
-    print(df.dtypes)
-    assert len(df.index) == 3
-    assert len(df.columns) == 5
-    assert df.c0.max() == (2 ** 8) - 1
-    assert df.c1.max() == (2 ** 16) - 1
-    assert df.c2.max() == (2 ** 32) - 1
-    assert df.c3.max() == (2 ** 64) - 1
-    assert df.c4.astype("uint8").sum() == 3
-    wr.s3.delete_objects(path=path)
+def test_s3_get_bucket_region(bucket, region):
+    assert wr.s3.get_bucket_region(bucket=bucket) == region
+    assert wr.s3.get_bucket_region(bucket=bucket, boto3_session=boto3.Session()) == region
 
 
-def test_parquet_overwrite_partition_cols(path, database, table, external_schema):
-    df = pd.DataFrame({"c0": [1, 2, 1, 2], "c1": [1, 2, 1, 2], "c2": [2, 1, 2, 1]})
+def test_catalog_get_databases(database):
+    dbs = list(wr.catalog.get_databases())
+    assert len(dbs) > 0
+    for db in dbs:
+        if db["Name"] == database:
+            assert db["Description"] == "AWS Data Wrangler Test Arena - Glue Database"
 
-    paths = wr.s3.to_parquet(
-        df=df, path=path, dataset=True, database=database, table=table, mode="overwrite", partition_cols=["c2"]
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) == 4
-    assert len(df.columns) == 3
-    assert df.c0.sum() == 6
-    assert df.c1.sum() == 6
-    assert df.c2.sum() == 6
 
-    paths = wr.s3.to_parquet(
-        df=df, path=path, dataset=True, database=database, table=table, mode="overwrite", partition_cols=["c1", "c2"]
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) == 4
-    assert len(df.columns) == 3
-    assert df.c0.sum() == 6
-    assert df.c1.sum() == 6
-    assert df.c2.sum() == 6
+def test_athena_query_cancelled(database):
+    session = boto3.Session()
+    query_execution_id = wr.athena.start_query_execution(sql=get_query_long(), database=database, boto3_session=session)
+    wr.athena.stop_query_execution(query_execution_id=query_execution_id, boto3_session=session)
+    with pytest.raises(wr.exceptions.QueryCancelled):
+        assert wr.athena.wait_query(query_execution_id=query_execution_id)
 
-    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
-    df = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
-    assert len(df.index) == 4
-    assert len(df.columns) == 3
-    assert df.c0.sum() == 6
-    assert df.c1.sum() == 6
-    assert df.c2.sum() == 6
 
+def test_athena_query_failed(database):
+    query_execution_id = wr.athena.start_query_execution(sql="SELECT random(-1)", database=database)
+    with pytest.raises(wr.exceptions.QueryFailed):
+        assert wr.athena.wait_query(query_execution_id=query_execution_id)
 
-def test_catalog_parameters(bucket, database):
-    table = "test_catalog_parameters"
-    path = f"s3://{bucket}/{table}/"
-    wr.s3.delete_objects(path=path)
-    wr.catalog.delete_table_if_exists(database=database, table=table)
 
-    wr.s3.to_parquet(
-        df=pd.DataFrame({"c0": [1, 2]}),
-        path=path,
-        dataset=True,
-        database=database,
-        table=table,
-        mode="overwrite",
-        parameters={"a": "1", "b": "2"},
-    )
-    pars = wr.catalog.get_table_parameters(database=database, table=table)
-    assert pars["a"] == "1"
-    assert pars["b"] == "2"
-    pars["a"] = "0"
-    pars["c"] = "3"
-    wr.catalog.upsert_table_parameters(parameters=pars, database=database, table=table)
-    pars = wr.catalog.get_table_parameters(database=database, table=table)
-    assert pars["a"] == "0"
-    assert pars["b"] == "2"
-    assert pars["c"] == "3"
-    wr.catalog.overwrite_table_parameters(parameters={"d": "4"}, database=database, table=table)
-    pars = wr.catalog.get_table_parameters(database=database, table=table)
-    assert pars.get("a") is None
-    assert pars.get("b") is None
-    assert pars.get("c") is None
-    assert pars["d"] == "4"
-    df = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) == 2
-    assert len(df.columns) == 1
-    assert df.c0.sum() == 3
+def test_athena_read_list(database):
+    with pytest.raises(wr.exceptions.UnsupportedType):
+        wr.athena.read_sql_query(sql="SELECT ARRAY[1, 2, 3]", database=database, ctas_approach=False)
 
-    wr.s3.to_parquet(
-        df=pd.DataFrame({"c0": [3, 4]}),
-        path=path,
-        dataset=True,
-        database=database,
-        table=table,
-        mode="append",
-        parameters={"e": "5"},
-    )
-    pars = wr.catalog.get_table_parameters(database=database, table=table)
-    assert pars.get("a") is None
-    assert pars.get("b") is None
-    assert pars.get("c") is None
-    assert pars["d"] == "4"
-    assert pars["e"] == "5"
-    df = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) == 4
-    assert len(df.columns) == 1
-    assert df.c0.sum() == 10
 
-    wr.s3.delete_objects(path=path)
-    wr.catalog.delete_table_if_exists(database=database, table=table)
+def test_sanitize_names():
+    assert wr.catalog.sanitize_column_name("CamelCase") == "camel_case"
+    assert wr.catalog.sanitize_column_name("CamelCase2") == "camel_case2"
+    assert wr.catalog.sanitize_column_name("Camel_Case3") == "camel_case3"
+    assert wr.catalog.sanitize_column_name("Cámël_Casë4仮") == "camel_case4_"
+    assert wr.catalog.sanitize_column_name("Camel__Case5") == "camel__case5"
+    assert wr.catalog.sanitize_column_name("Camel{}Case6") == "camel_case6"
+    assert wr.catalog.sanitize_column_name("Camel.Case7") == "camel_case7"
+    assert wr.catalog.sanitize_column_name("xyz_cd") == "xyz_cd"
+    assert wr.catalog.sanitize_column_name("xyz_Cd") == "xyz_cd"
+    assert wr.catalog.sanitize_table_name("CamelCase") == "camel_case"
+    assert wr.catalog.sanitize_table_name("CamelCase2") == "camel_case2"
+    assert wr.catalog.sanitize_table_name("Camel_Case3") == "camel_case3"
+    assert wr.catalog.sanitize_table_name("Cámël_Casë4仮") == "camel_case4_"
+    assert wr.catalog.sanitize_table_name("Camel__Case5") == "camel__case5"
+    assert wr.catalog.sanitize_table_name("Camel{}Case6") == "camel_case6"
+    assert wr.catalog.sanitize_table_name("Camel.Case7") == "camel_case7"
+    assert wr.catalog.sanitize_table_name("xyz_cd") == "xyz_cd"
+    assert wr.catalog.sanitize_table_name("xyz_Cd") == "xyz_cd"
 
 
-def test_metadata_partitions(path):
-    path = f"{path}0.parquet"
-    df = pd.DataFrame({"c0": [0, 1, 2], "c1": ["3", "4", "5"], "c2": [6.0, 7.0, 8.0]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=False)["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    columns_types, partitions_types = wr.s3.read_parquet_metadata(path=path, dataset=False)
-    assert len(columns_types) == len(df.columns)
-    assert columns_types.get("c0") == "bigint"
-    assert columns_types.get("c1") == "string"
-    assert columns_types.get("c2") == "double"
+def test_athena_ctas_empty(database):
+    sql = """
+        WITH dataset AS (
+          SELECT 0 AS id
+        )
+        SELECT id
+        FROM dataset
+        WHERE id != 0
+    """
+    assert wr.athena.read_sql_query(sql=sql, database=database).empty is True
+    assert len(list(wr.athena.read_sql_query(sql=sql, database=database, chunksize=1))) == 0
 
 
-@pytest.mark.parametrize("partition_cols", [None, ["c2"], ["c1", "c2"]])
-def test_metadata_partitions_dataset(path, partition_cols):
-    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2": [6, 7, 8]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=partition_cols)["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    columns_types, partitions_types = wr.s3.read_parquet_metadata(path=path, dataset=True)
-    partitions_types = partitions_types if partitions_types is not None else {}
-    assert len(columns_types) + len(partitions_types) == len(df.columns)
-    assert columns_types.get("c0") == "bigint"
-    assert (columns_types.get("c1") == "bigint") or (partitions_types.get("c1") == "string")
-    assert (columns_types.get("c1") == "bigint") or (partitions_types.get("c1") == "string")
+def test_s3_empty_dfs():
+    df = pd.DataFrame()
+    with pytest.raises(wr.exceptions.EmptyDataFrame):
+        wr.s3.to_parquet(df=df, path="")
+    with pytest.raises(wr.exceptions.EmptyDataFrame):
+        wr.s3.to_csv(df=df, path="")
 
 
-@pytest.mark.parametrize("partition_cols", [None, ["c2"], ["c1", "c2"]])
-def test_store_metadata_partitions_dataset(database, table, path, partition_cols):
-    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2": [6, 7, 8]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=partition_cols)["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    wr.s3.store_parquet_metadata(path=path, database=database, table=table, dataset=True)
-    df2 = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) == len(df2.index)
-    assert len(df.columns) == len(df2.columns)
-    assert df.c0.sum() == df2.c0.sum()
-    assert df.c1.sum() == df2.c1.astype(int).sum()
-    assert df.c2.sum() == df2.c2.astype(int).sum()
-
-
-def test_json_chunksize(path):
-    num_files = 10
-    df = pd.DataFrame({"id": [1, 2, 3], "value": ["foo", "boo", "bar"]})
-    paths = [f"{path}{i}.json" for i in range(num_files)]
-    for p in paths:
-        wr.s3.to_json(df, p, orient="records", lines=True)
-    wr.s3.wait_objects_exist(paths)
-    dfs = list(wr.s3.read_json(paths, lines=True, chunksize=1))
-    assert len(dfs) == (3 * num_files)
-    for d in dfs:
-        assert len(d.columns) == 2
-        assert d.id.iloc[0] in (1, 2, 3)
-        assert d.value.iloc[0] in ("foo", "boo", "bar")
-
-
-def test_parquet_cast_string(path):
-    df = pd.DataFrame({"id": [1, 2, 3], "value": ["foo", "boo", "bar"]})
-    path_file = f"{path}0.parquet"
-    wr.s3.to_parquet(df, path_file, dtype={"id": "string"})
-    wr.s3.wait_objects_exist([path_file])
-    df2 = wr.s3.read_parquet(path_file)
-    assert str(df2.id.dtypes) == "string"
-    df2["id"] = df2["id"].astype(int)
-    assert df.shape == df2.shape
-    for col, row in tuple(itertools.product(df.columns, range(3))):
-        assert df[col].iloc[row] == df2[col].iloc[row]
+def test_absent_object(bucket):
+    path = f"s3://{bucket}/test_absent_object"
+    assert wr.s3.does_object_exist(path=path) is False
+    assert len(wr.s3.size_objects(path=path)) == 0
+    assert wr.s3.wait_objects_exist(paths=[]) is None
 
 
-@pytest.mark.parametrize("partition_cols", [None, ["c2"], ["value", "c2"]])
-def test_parquet_cast_string_dataset(path, partition_cols):
-    df = pd.DataFrame({"id": [1, 2, 3], "value": ["foo", "boo", "bar"], "c2": [4, 5, 6], "c3": [7.0, 8.0, 9.0]})
-    paths = wr.s3.to_parquet(
-        df, path, dataset=True, partition_cols=partition_cols, dtype={"id": "string", "c3": "string"}
-    )["paths"]
-    wr.s3.wait_objects_exist(paths)
-    df2 = wr.s3.read_parquet(path, dataset=True).sort_values("id", ignore_index=True)
-    assert str(df2.id.dtypes) == "string"
-    assert str(df2.c3.dtypes) == "string"
-    df2["id"] = df2["id"].astype(int)
-    df2["c3"] = df2["c3"].astype(float)
-    assert df.shape == df2.shape
-    for col, row in tuple(itertools.product(df.columns, range(3))):
-        assert df[col].iloc[row] == df2[col].iloc[row]
+def test_athena_struct(database):
+    sql = "SELECT CAST(ROW(1, 'foo') AS ROW(id BIGINT, value VARCHAR)) AS col0"
+    with pytest.raises(wr.exceptions.UnsupportedType):
+        wr.athena.read_sql_query(sql=sql, database=database, ctas_approach=False)
+    df = wr.athena.read_sql_query(sql=sql, database=database, ctas_approach=True)
+    assert len(df.index) == 1
+    assert len(df.columns) == 1
+    assert df["col0"].iloc[0]["id"] == 1
+    assert df["col0"].iloc[0]["value"] == "foo"
+    sql = "SELECT ROW(1, ROW(2, ROW(3, '4'))) AS col0"
+    df = wr.athena.read_sql_query(sql=sql, database=database, ctas_approach=True)
+    assert len(df.index) == 1
+    assert len(df.columns) == 1
+    assert df["col0"].iloc[0]["field0"] == 1
+    assert df["col0"].iloc[0]["field1"]["field0"] == 2
+    assert df["col0"].iloc[0]["field1"]["field1"]["field0"] == 3
+    assert df["col0"].iloc[0]["field1"]["field1"]["field1"] == "4"
 
 
-@pytest.mark.parametrize("partition_cols", [None, ["c2"], ["c1", "c2"]])
-def test_store_metadata_partitions_sample_dataset(database, table, path, partition_cols):
-    num_files = 10
-    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2": [6, 7, 8]})
-    for _ in range(num_files):
-        paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=partition_cols)["paths"]
-        wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    wr.s3.store_parquet_metadata(
-        path=path, database=database, table=table, dtype={"c1": "bigint", "c2": "smallint"}, sampling=0.25, dataset=True
-    )
-    df2 = wr.athena.read_sql_table(table=table, database=database)
-    assert len(df.index) * num_files == len(df2.index)
-    assert len(df.columns) == len(df2.columns)
-    assert df.c0.sum() * num_files == df2.c0.sum()
-    assert df.c1.sum() * num_files == df2.c1.sum()
-    assert df.c2.sum() * num_files == df2.c2.sum()
+def test_athena_time_zone(database):
+    sql = "SELECT current_timestamp AS value, typeof(current_timestamp) AS type"
+    df = wr.athena.read_sql_query(sql=sql, database=database, ctas_approach=False)
+    assert len(df.index) == 1
+    assert len(df.columns) == 2
+    assert df["type"][0] == "timestamp with time zone"
+    assert df["value"][0].year == datetime.datetime.utcnow().year
 
 
-def test_athena_undefined_column(database):
-    with pytest.raises(wr.exceptions.InvalidArgumentValue):
-        wr.athena.read_sql_query("SELECT 1", database)
-    with pytest.raises(wr.exceptions.InvalidArgumentValue):
-        wr.athena.read_sql_query("SELECT NULL AS my_null", database)
+def test_category(bucket, database):
+    df = get_df_category()
+    path = f"s3://{bucket}/test_category/"
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        database=database,
+        table="test_category",
+        mode="overwrite",
+        partition_cols=["par0", "par1"],
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.s3.read_parquet(path=path, dataset=True, categories=[c for c in df.columns if c not in ["par0", "par1"]])
+    ensure_data_types_category(df2)
+    df2 = wr.athena.read_sql_query("SELECT * FROM test_category", database=database, categories=list(df.columns))
+    ensure_data_types_category(df2)
+    df2 = wr.athena.read_sql_table(table="test_category", database=database, categories=list(df.columns))
+    ensure_data_types_category(df2)
+    df2 = wr.athena.read_sql_query(
+        "SELECT * FROM test_category", database=database, categories=list(df.columns), ctas_approach=False
+    )
+    ensure_data_types_category(df2)
+    dfs = wr.athena.read_sql_query(
+        "SELECT * FROM test_category", database=database, categories=list(df.columns), ctas_approach=False, chunksize=1
+    )
+    for df2 in dfs:
+        ensure_data_types_category(df2)
+    dfs = wr.athena.read_sql_query(
+        "SELECT * FROM test_category", database=database, categories=list(df.columns), ctas_approach=True, chunksize=1
+    )
+    for df2 in dfs:
+        ensure_data_types_category(df2)
+    wr.s3.delete_objects(path=paths)
+    assert wr.catalog.delete_table_if_exists(database=database, table="test_category") is True
 
 
-def test_to_parquet_file_sanitize(path):
-    df = pd.DataFrame({"C0": [0, 1], "camelCase": [2, 3], "c**--2": [4, 5]})
+def test_parquet_validate_schema(path):
+    df = pd.DataFrame({"id": [1, 2, 3]})
     path_file = f"{path}0.parquet"
-    wr.s3.to_parquet(df, path_file)
-    wr.s3.wait_objects_exist([path_file])
-    df2 = wr.s3.read_parquet(path_file)
-    assert df.shape == df2.shape
-    assert list(df2.columns) == ["c0", "camel_case", "c_2"]
-    assert df2.c0.sum() == 1
-    assert df2.camel_case.sum() == 5
-    assert df2.c_2.sum() == 9
-
+    wr.s3.to_parquet(df=df, path=path_file)
+    wr.s3.wait_objects_exist(paths=[path_file])
+    df2 = pd.DataFrame({"id2": [1, 2, 3], "val": ["foo", "boo", "bar"]})
+    path_file2 = f"{path}1.parquet"
+    wr.s3.to_parquet(df=df2, path=path_file2)
+    wr.s3.wait_objects_exist(paths=[path_file2], use_threads=False)
+    df3 = wr.s3.read_parquet(path=path, validate_schema=False)
+    assert len(df3.index) == 6
+    assert len(df3.columns) == 3
+    with pytest.raises(ValueError):
+        wr.s3.read_parquet(path=path, validate_schema=True)
 
-def test_to_parquet_modes(database, table, path, external_schema):
 
-    # Round 1 - Warm up
-    df = pd.DataFrame({"c0": [0, None]}, dtype="Int64")
-    paths = wr.s3.to_parquet(
+def test_csv_dataset(bucket, database):
+    path = f"s3://{bucket}/test_csv_dataset/"
+    with pytest.raises(wr.exceptions.UndetectedType):
+        wr.s3.to_csv(pd.DataFrame({"A": [None]}), path, dataset=True, database=database, table="test_csv_dataset")
+    df = get_df_csv()
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        wr.s3.to_csv(df, path, dataset=False, mode="overwrite", database=database, table="test_csv_dataset")
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        wr.s3.to_csv(df, path, dataset=False, table="test_csv_dataset")
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        wr.s3.to_csv(df, path, dataset=True, mode="overwrite", database=database)
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        wr.s3.to_csv(df=df, path=path, mode="append")
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        wr.s3.to_csv(df=df, path=path, partition_cols=["col2"])
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        wr.s3.to_csv(df=df, path=path, description="foo")
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        wr.s3.to_csv(df=df, path=path, partition_cols=["col2"], dataset=True, mode="WRONG")
+    paths = wr.s3.to_csv(
         df=df,
         path=path,
+        sep="|",
+        index=False,
+        use_threads=True,
+        boto3_session=None,
+        s3_additional_kwargs=None,
         dataset=True,
+        partition_cols=["par0", "par1"],
         mode="overwrite",
-        database=database,
-        table=table,
-        description="c0",
-        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index))},
-        columns_comments={"c0": "0"},
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table(table, database)
-    assert df.shape == df2.shape
-    assert df.c0.sum() == df2.c0.sum()
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == str(len(df2.columns))
-    assert parameters["num_rows"] == str(len(df2.index))
-    assert wr.catalog.get_table_description(database, table) == "c0"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c0"] == "0"
+    df2 = wr.s3.read_csv(path=paths, sep="|", header=None)
+    assert len(df2.index) == 3
+    assert len(df2.columns) == 8
+    assert df2[0].sum() == 6
+    wr.s3.delete_objects(path=paths)
 
-    # Round 2 - Overwrite
-    df = pd.DataFrame({"c1": [None, 1, None]}, dtype="Int16")
-    paths = wr.s3.to_parquet(
+
+def test_csv_catalog(bucket, database):
+    path = f"s3://{bucket}/test_csv_catalog/"
+    df = get_df_csv()
+    paths = wr.s3.to_csv(
         df=df,
         path=path,
+        sep="\t",
+        index=True,
+        use_threads=True,
+        boto3_session=None,
+        s3_additional_kwargs=None,
         dataset=True,
+        partition_cols=["par0", "par1"],
         mode="overwrite",
+        table="test_csv_catalog",
         database=database,
-        table=table,
-        description="c1",
-        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index))},
-        columns_comments={"c1": "1"},
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table(table, database)
-    assert df.shape == df2.shape
-    assert df.c1.sum() == df2.c1.sum()
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == str(len(df2.columns))
-    assert parameters["num_rows"] == str(len(df2.index))
-    assert wr.catalog.get_table_description(database, table) == "c1"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c1"] == "1"
+    df2 = wr.athena.read_sql_table("test_csv_catalog", database)
+    assert len(df2.index) == 3
+    assert len(df2.columns) == 11
+    assert df2["id"].sum() == 6
+    ensure_data_types_csv(df2)
+    wr.s3.delete_objects(path=paths)
+    assert wr.catalog.delete_table_if_exists(database=database, table="test_csv_catalog") is True
 
-    # Round 3 - Append
-    df = pd.DataFrame({"c1": [None, 2, None]}, dtype="Int8")
-    paths = wr.s3.to_parquet(
-        df=df,
+
+def test_csv_catalog_columns(bucket, database):
+    path = f"s3://{bucket}/test_csv_catalog_columns /"
+    paths = wr.s3.to_csv(
+        df=get_df_csv(),
         path=path,
+        sep="|",
+        columns=["id", "date", "timestamp", "par0", "par1"],
+        index=False,
+        use_threads=False,
+        boto3_session=None,
+        s3_additional_kwargs=None,
         dataset=True,
-        mode="append",
+        partition_cols=["par0", "par1"],
+        mode="overwrite",
+        table="test_csv_catalog_columns",
         database=database,
-        table=table,
-        description="c1",
-        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index) * 2)},
-        columns_comments={"c1": "1"},
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table(table, database)
-    assert len(df.columns) == len(df2.columns)
-    assert len(df.index) * 2 == len(df2.index)
-    assert df.c1.sum() + 1 == df2.c1.sum()
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == str(len(df2.columns))
-    assert parameters["num_rows"] == str(len(df2.index))
-    assert wr.catalog.get_table_description(database, table) == "c1"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c1"] == "1"
+    df2 = wr.athena.read_sql_table("test_csv_catalog_columns", database)
+    assert len(df2.index) == 3
+    assert len(df2.columns) == 5
+    assert df2["id"].sum() == 6
+    ensure_data_types_csv(df2)
 
-    # Round 4 - Append + New Column
-    df = pd.DataFrame({"c2": ["a", None, "b"], "c1": [None, None, None]})
-    paths = wr.s3.to_parquet(
-        df=df,
+    paths = wr.s3.to_csv(
+        df=pd.DataFrame({"id": [4], "date": [None], "timestamp": [None], "par0": [1], "par1": ["a"]}),
         path=path,
+        sep="|",
+        index=False,
+        use_threads=False,
+        boto3_session=None,
+        s3_additional_kwargs=None,
         dataset=True,
-        mode="append",
+        partition_cols=["par0", "par1"],
+        mode="overwrite_partitions",
+        table="test_csv_catalog_columns",
         database=database,
-        table=table,
-        description="c1+c2",
-        parameters={"num_cols": "2", "num_rows": "9"},
-        columns_comments={"c1": "1", "c2": "2"},
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table(table, database)
-    assert len(df2.columns) == 2
-    assert len(df2.index) == 9
-    assert df2.c1.sum() == 3
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == "2"
-    assert parameters["num_rows"] == "9"
-    assert wr.catalog.get_table_description(database, table) == "c1+c2"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c1"] == "1"
-    assert comments["c2"] == "2"
+    df2 = wr.athena.read_sql_table("test_csv_catalog_columns", database)
+    assert len(df2.index) == 3
+    assert len(df2.columns) == 5
+    assert df2["id"].sum() == 9
+    ensure_data_types_csv(df2)
 
-    # Round 5 - Append + New Column + Wrong Types
-    df = pd.DataFrame({"c2": [1], "c3": [True], "c1": ["1"]})
-    paths = wr.s3.to_parquet(
+    wr.s3.delete_objects(path=path)
+    assert wr.catalog.delete_table_if_exists(database=database, table="test_csv_catalog_columns") is True
+
+
+def test_athena_types(bucket, database):
+    path = f"s3://{bucket}/test_athena_types/"
+    df = get_df_csv()
+    paths = wr.s3.to_csv(
         df=df,
         path=path,
+        sep=",",
+        index=False,
+        use_threads=True,
+        boto3_session=None,
+        s3_additional_kwargs=None,
         dataset=True,
-        mode="append",
-        database=database,
-        table=table,
-        description="c1+c2+c3",
-        parameters={"num_cols": "3", "num_rows": "10"},
-        columns_comments={"c1": "1!", "c2": "2!", "c3": "3"},
+        partition_cols=["par0", "par1"],
+        mode="overwrite",
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table(table, database)
-    assert len(df2.columns) == 3
-    assert len(df2.index) == 10
-    assert df2.c1.sum() == 4
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == "3"
-    assert parameters["num_rows"] == "10"
-    assert wr.catalog.get_table_description(database, table) == "c1+c2+c3"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c1"] == "1!"
-    assert comments["c2"] == "2!"
-    assert comments["c3"] == "3"
-    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
-    df3 = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
-    assert len(df3.columns) == 3
-    assert len(df3.index) == 10
-    assert df3.c1.sum() == 4
+    columns_types, partitions_types = wr.catalog.extract_athena_types(
+        df=df, index=False, partition_cols=["par0", "par1"], file_format="csv"
+    )
+    wr.catalog.create_csv_table(
+        table="test_athena_types",
+        database=database,
+        path=path,
+        partitions_types=partitions_types,
+        columns_types=columns_types,
+    )
+    wr.catalog.create_csv_table(
+        database=database, table="test_athena_types", path=path, columns_types={"col0": "string"}, mode="append"
+    )
+    wr.athena.repair_table("test_athena_types", database)
+    assert len(wr.catalog.get_csv_partitions(database, "test_athena_types")) == 3
+    df2 = wr.athena.read_sql_table("test_athena_types", database)
+    assert len(df2.index) == 3
+    assert len(df2.columns) == 10
+    assert df2["id"].sum() == 6
+    ensure_data_types_csv(df2)
+    wr.s3.delete_objects(path=paths)
+    assert wr.catalog.delete_table_if_exists(database=database, table="test_athena_types") is True
 
-    # Round 6 - Overwrite Partitioned
-    df = pd.DataFrame({"c0": ["foo", None], "c1": [0, 1]})
+
+def test_parquet_catalog_columns(bucket, database):
+    path = f"s3://{bucket}/test_parquet_catalog_columns/"
     paths = wr.s3.to_parquet(
-        df=df,
+        df=get_df_csv()[["id", "date", "timestamp", "par0", "par1"]],
         path=path,
+        index=False,
+        use_threads=False,
+        boto3_session=None,
+        s3_additional_kwargs=None,
         dataset=True,
+        partition_cols=["par0", "par1"],
         mode="overwrite",
+        table="test_parquet_catalog_columns",
         database=database,
-        table=table,
-        partition_cols=["c1"],
-        description="c0+c1",
-        parameters={"num_cols": "2", "num_rows": "2"},
-        columns_comments={"c0": "zero", "c1": "one"},
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table(table, database)
-    assert df.shape == df2.shape
-    assert df.c1.sum() == df2.c1.sum()
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == "2"
-    assert parameters["num_rows"] == "2"
-    assert wr.catalog.get_table_description(database, table) == "c0+c1"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c0"] == "zero"
-    assert comments["c1"] == "one"
+    df2 = wr.athena.read_sql_table("test_parquet_catalog_columns", database)
+    assert len(df2.index) == 3
+    assert len(df2.columns) == 5
+    assert df2["id"].sum() == 6
+    ensure_data_types_csv(df2)
 
-    # Round 7 - Overwrite Partitions
-    df = pd.DataFrame({"c0": [None, None], "c1": [0, 2]})
     paths = wr.s3.to_parquet(
-        df=df,
+        df=pd.DataFrame({"id": [4], "date": [None], "timestamp": [None], "par0": [1], "par1": ["a"]}),
         path=path,
+        index=False,
+        use_threads=False,
+        boto3_session=None,
+        s3_additional_kwargs=None,
         dataset=True,
+        partition_cols=["par0", "par1"],
         mode="overwrite_partitions",
+        table="test_parquet_catalog_columns",
         database=database,
-        table=table,
-        partition_cols=["c1"],
-        description="c0+c1",
-        parameters={"num_cols": "2", "num_rows": "3"},
-        columns_comments={"c0": "zero", "c1": "one"},
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table(table, database)
-    assert len(df2.columns) == 2
+    df2 = wr.athena.read_sql_table("test_parquet_catalog_columns", database)
     assert len(df2.index) == 3
-    assert df2.c1.sum() == 3
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == "2"
-    assert parameters["num_rows"] == "3"
-    assert wr.catalog.get_table_description(database, table) == "c0+c1"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c0"] == "zero"
-    assert comments["c1"] == "one"
+    assert len(df2.columns) == 5
+    assert df2["id"].sum() == 9
+    ensure_data_types_csv(df2)
 
-    # Round 8 - Overwrite Partitions + New Column + Wrong Type
-    df = pd.DataFrame({"c0": [1, 2], "c1": ["1", "3"], "c2": [True, False]})
+    wr.s3.delete_objects(path=path)
+    assert wr.catalog.delete_table_if_exists(database=database, table="test_parquet_catalog_columns") is True
+
+
+@pytest.mark.parametrize("compression", [None, "gzip", "snappy"])
+def test_parquet_compress(bucket, database, compression):
+    path = f"s3://{bucket}/test_parquet_compress_{compression}/"
     paths = wr.s3.to_parquet(
-        df=df,
+        df=get_df(),
         path=path,
+        compression=compression,
         dataset=True,
-        mode="overwrite_partitions",
         database=database,
-        table=table,
-        partition_cols=["c1"],
-        description="c0+c1+c2",
-        parameters={"num_cols": "3", "num_rows": "4"},
-        columns_comments={"c0": "zero", "c1": "one", "c2": "two"},
+        table=f"test_parquet_compress_{compression}",
+        mode="overwrite",
     )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    df2 = wr.athena.read_sql_table(table, database)
+    df2 = wr.athena.read_sql_table(f"test_parquet_compress_{compression}", database)
+    ensure_data_types(df2)
+    df2 = wr.s3.read_parquet(path=path)
+    wr.s3.delete_objects(path=path)
+    assert wr.catalog.delete_table_if_exists(database=database, table=f"test_parquet_compress_{compression}") is True
+    ensure_data_types(df2)
+
+
+@pytest.mark.parametrize("compression", ["gzip", "bz2", "xz"])
+def test_csv_compress(bucket, compression):
+    path = f"s3://{bucket}/test_csv_compress_{compression}/"
+    wr.s3.delete_objects(path=path)
+    df = get_df_csv()
+    if compression == "gzip":
+        buffer = BytesIO()
+        with gzip.GzipFile(mode="w", fileobj=buffer) as zipped_file:
+            df.to_csv(TextIOWrapper(zipped_file, "utf8"), index=False, header=None)
+        s3_resource = boto3.resource("s3")
+        s3_object = s3_resource.Object(bucket, f"test_csv_compress_{compression}/test.csv.gz")
+        s3_object.put(Body=buffer.getvalue())
+        file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv.gz"
+    elif compression == "bz2":
+        buffer = BytesIO()
+        with bz2.BZ2File(mode="w", filename=buffer) as zipped_file:
+            df.to_csv(TextIOWrapper(zipped_file, "utf8"), index=False, header=None)
+        s3_resource = boto3.resource("s3")
+        s3_object = s3_resource.Object(bucket, f"test_csv_compress_{compression}/test.csv.bz2")
+        s3_object.put(Body=buffer.getvalue())
+        file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv.bz2"
+    elif compression == "xz":
+        buffer = BytesIO()
+        with lzma.LZMAFile(mode="w", filename=buffer) as zipped_file:
+            df.to_csv(TextIOWrapper(zipped_file, "utf8"), index=False, header=None)
+        s3_resource = boto3.resource("s3")
+        s3_object = s3_resource.Object(bucket, f"test_csv_compress_{compression}/test.csv.xz")
+        s3_object.put(Body=buffer.getvalue())
+        file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv.xz"
+    else:
+        file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv"
+        wr.s3.to_csv(df=df, path=file_path, index=False, header=None)
+
+    wr.s3.wait_objects_exist(paths=[file_path])
+    df2 = wr.s3.read_csv(path=[file_path], names=df.columns)
+    assert len(df2.index) == 3
+    assert len(df2.columns) == 10
+    dfs = wr.s3.read_csv(path=[file_path], names=df.columns, chunksize=1)
+    for df3 in dfs:
+        assert len(df3.columns) == 10
+    wr.s3.delete_objects(path=path)
+
+
+def test_parquet_char_length(path, database, table, external_schema):
+    df = pd.DataFrame(
+        {"id": [1, 2], "cchar": ["foo", "boo"], "date": [datetime.date(2020, 1, 1), datetime.date(2020, 1, 2)]}
+    )
+    wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        database=database,
+        table=table,
+        mode="overwrite",
+        partition_cols=["date"],
+        dtype={"cchar": "char(3)"},
+    )
+
+    df2 = wr.s3.read_parquet(path, dataset=True)
+    assert len(df2.index) == 2
     assert len(df2.columns) == 3
-    assert len(df2.index) == 4
-    assert df2.c1.sum() == 6
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == "3"
-    assert parameters["num_rows"] == "4"
-    assert wr.catalog.get_table_description(database, table) == "c0+c1+c2"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c0"] == "zero"
-    assert comments["c1"] == "one"
-    assert comments["c2"] == "two"
+    assert df2.id.sum() == 3
+
+    df2 = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df2.index) == 2
+    assert len(df2.columns) == 3
+    assert df2.id.sum() == 3
+
     engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
-    df3 = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
-    assert len(df3.columns) == 3
-    assert len(df3.index) == 4
-    assert df3.c1.sum() == 6
+    df2 = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
+    assert len(df2.index) == 2
+    assert len(df2.columns) == 3
+    assert df2.id.sum() == 3
+
+
+def test_merge(bucket):
+    path = f"s3://{bucket}/test_merge/"
+    df = pd.DataFrame({"id": [1, 2, 3], "par": [1, 2, 3]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    df = wr.s3.read_parquet(path=path, dataset=True)
+    assert df.id.sum() == 6
+    assert df.par.astype("Int64").sum() == 6
+
+    path2 = f"s3://{bucket}/test_merge2/"
+    df = pd.DataFrame({"id": [1, 2, 3], "par": [1, 2, 3]})
+    paths = wr.s3.to_parquet(df=df, path=path2, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    paths = wr.s3.merge_datasets(source_path=path2, target_path=path, mode="append", use_threads=True)
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df = wr.s3.read_parquet(path=path, dataset=True)
+    assert df.id.sum() == 12
+    assert df.par.astype("Int64").sum() == 12
+
+    paths = wr.s3.merge_datasets(source_path=path2, target_path=path, mode="overwrite", use_threads=False)
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df = wr.s3.read_parquet(path=path, dataset=True)
+    assert df.id.sum() == 6
+    assert df.par.astype("Int64").sum() == 6
+
+    df = pd.DataFrame({"id": [4], "par": [3]})
+    paths = wr.s3.to_parquet(df=df, path=path2, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    paths = wr.s3.merge_datasets(source_path=path2, target_path=path, mode="overwrite_partitions", use_threads=True)
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df = wr.s3.read_parquet(path=path, dataset=True)
+    assert df.id.sum() == 7
+    assert df.par.astype("Int64").sum() == 6
+
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        wr.s3.merge_datasets(source_path=path, target_path="bar", mode="WRONG")
+
+    assert len(wr.s3.merge_datasets(source_path=f"s3://{bucket}/empty/", target_path="bar")) == 0
+
+    wr.s3.delete_objects(path=path)
+    wr.s3.delete_objects(path=path2)
+
+
+def test_copy(bucket):
+    path = f"s3://{bucket}/test_copy/"
+    df = pd.DataFrame({"id": [1, 2, 3], "par": [1, 2, 3]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    df = wr.s3.read_parquet(path=path, dataset=True)
+    assert df.id.sum() == 6
+    assert df.par.astype("Int64").sum() == 6
+
+    path2 = f"s3://{bucket}/test_copy2/"
+    df = pd.DataFrame({"id": [1, 2, 3], "par": [1, 2, 3]})
+    paths = wr.s3.to_parquet(df=df, path=path2, dataset=True, partition_cols=["par"], mode="overwrite")["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
+    paths = wr.s3.copy_objects(paths, source_path=path2, target_path=path, use_threads=True)
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df = wr.s3.read_parquet(path=path, dataset=True)
+    assert df.id.sum() == 12
+    assert df.par.astype("Int64").sum() == 12
+
+    assert len(wr.s3.copy_objects([], source_path="boo", target_path="bar")) == 0
+
+    wr.s3.delete_objects(path=path)
+    wr.s3.delete_objects(path=path2)
+
+
+@pytest.mark.parametrize("col2", [[1, 1, 1, 1, 1], [1, 2, 3, 4, 5], [1, 1, 1, 1, 2], [1, 2, 2, 2, 2]])
+@pytest.mark.parametrize("chunked", [True, 1, 2, 100])
+def test_parquet_chunked(bucket, database, col2, chunked):
+    table = f"test_parquet_chunked_{chunked}_{''.join([str(x) for x in col2])}"
+    path = f"s3://{bucket}/{table}/"
+    wr.s3.delete_objects(path=path)
+    values = list(range(5))
+    df = pd.DataFrame({"col1": values, "col2": col2})
+    paths = wr.s3.to_parquet(
+        df, path, index=False, dataset=True, database=database, table=table, partition_cols=["col2"], mode="overwrite"
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths)
 
+    dfs = list(wr.s3.read_parquet(path=path, dataset=True, chunked=chunked))
+    assert sum(values) == pd.concat(dfs, ignore_index=True).col1.sum()
+    if chunked is not True:
+        assert len(dfs) == int(math.ceil(len(df) / chunked))
+        for df2 in dfs[:-1]:
+            assert chunked == len(df2)
+        assert chunked >= len(dfs[-1])
+    else:
+        assert len(dfs) == len(set(col2))
 
-def test_store_parquet_metadata_modes(database, table, path, external_schema):
+    dfs = list(wr.athena.read_sql_table(database=database, table=table, chunksize=chunked))
+    assert sum(values) == pd.concat(dfs, ignore_index=True).col1.sum()
+    if chunked is not True:
+        assert len(dfs) == int(math.ceil(len(df) / chunked))
+        for df2 in dfs[:-1]:
+            assert chunked == len(df2)
+        assert chunked >= len(dfs[-1])
 
-    # Round 1 - Warm up
-    df = pd.DataFrame({"c0": [0, None]}, dtype="Int64")
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite")["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    wr.s3.store_parquet_metadata(
-        path=path,
-        dataset=True,
-        mode="overwrite",
-        database=database,
-        table=table,
-        description="c0",
-        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index))},
-        columns_comments={"c0": "0"},
-    )
-    df2 = wr.athena.read_sql_table(table, database)
-    assert df.shape == df2.shape
-    assert df.c0.sum() == df2.c0.sum()
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == str(len(df2.columns))
-    assert parameters["num_rows"] == str(len(df2.index))
-    assert wr.catalog.get_table_description(database, table) == "c0"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c0"] == "0"
+    wr.s3.delete_objects(path=paths)
+    assert wr.catalog.delete_table_if_exists(database=database, table=table) is True
 
-    # Round 2 - Overwrite
-    df = pd.DataFrame({"c1": [None, 1, None]}, dtype="Int16")
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite")["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    wr.s3.store_parquet_metadata(
-        path=path,
-        dataset=True,
-        mode="overwrite",
-        database=database,
-        table=table,
-        description="c1",
-        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index))},
-        columns_comments={"c1": "1"},
-    )
-    df2 = wr.athena.read_sql_table(table, database)
-    assert df.shape == df2.shape
-    assert df.c1.sum() == df2.c1.sum()
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == str(len(df2.columns))
-    assert parameters["num_rows"] == str(len(df2.index))
-    assert wr.catalog.get_table_description(database, table) == "c1"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c1"] == "1"
 
-    # Round 3 - Append
-    df = pd.DataFrame({"c1": [None, 2, None]}, dtype="Int16")
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="append")["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    wr.s3.store_parquet_metadata(
-        path=path,
-        dataset=True,
-        mode="append",
-        database=database,
+@pytest.mark.parametrize("workgroup", [None, 0, 1, 2, 3])
+@pytest.mark.parametrize("encryption", [None, "SSE_S3", "SSE_KMS"])
+# @pytest.mark.parametrize("workgroup", [3])
+# @pytest.mark.parametrize("encryption", [None])
+def test_athena_encryption(
+    path, path2, database, table, table2, kms_key, encryption, workgroup, workgroup0, workgroup1, workgroup2, workgroup3
+):
+    kms_key = None if (encryption == "SSE_S3") or (encryption is None) else kms_key
+    if workgroup == 0:
+        workgroup = workgroup0
+    elif workgroup == 1:
+        workgroup = workgroup1
+    elif workgroup == 2:
+        workgroup = workgroup2
+    elif workgroup == 3:
+        workgroup = workgroup3
+    df = pd.DataFrame({"a": [1, 2], "b": ["foo", "boo"]})
+    paths = wr.s3.to_parquet(
+        df=df, path=path, dataset=True, mode="overwrite", database=database, table=table, s3_additional_kwargs=None
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.athena.read_sql_table(
         table=table,
-        description="c1",
-        parameters={"num_cols": str(len(df.columns)), "num_rows": str(len(df.index) * 2)},
-        columns_comments={"c1": "1"},
-    )
-    df2 = wr.athena.read_sql_table(table, database)
-    assert len(df.columns) == len(df2.columns)
-    assert len(df.index) * 2 == len(df2.index)
-    assert df.c1.sum() + 1 == df2.c1.sum()
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == str(len(df2.columns))
-    assert parameters["num_rows"] == str(len(df2.index))
-    assert wr.catalog.get_table_description(database, table) == "c1"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c1"] == "1"
-
-    # Round 4 - Append + New Column
-    df = pd.DataFrame({"c2": ["a", None, "b"], "c1": [None, 1, None]})
-    df["c1"] = df["c1"].astype("Int16")
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="append")["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    wr.s3.store_parquet_metadata(
-        path=path,
-        dataset=True,
-        mode="append",
+        ctas_approach=True,
         database=database,
-        table=table,
-        description="c1+c2",
-        parameters={"num_cols": "2", "num_rows": "9"},
-        columns_comments={"c1": "1", "c2": "2"},
+        encryption=encryption,
+        workgroup=workgroup,
+        kms_key=kms_key,
+        keep_files=True,
+        ctas_temp_table_name=table2,
+        s3_output=path2,
     )
-    df2 = wr.athena.read_sql_table(table, database)
+    assert wr.catalog.does_table_exist(database=database, table=table2) is False
+    assert len(df2.index) == 2
     assert len(df2.columns) == 2
-    assert len(df2.index) == 9
-    assert df2.c1.sum() == 4
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == "2"
-    assert parameters["num_rows"] == "9"
-    assert wr.catalog.get_table_description(database, table) == "c1+c2"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c1"] == "1"
-    assert comments["c2"] == "2"
 
-    # Round 5 - Overwrite Partitioned
-    df = pd.DataFrame({"c0": ["foo", None], "c1": [0, 1]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite", partition_cols=["c1"])["paths"]
-    wr.s3.wait_objects_exist(paths=paths)
-    wr.s3.store_parquet_metadata(
-        path=path,
-        dataset=True,
-        mode="overwrite",
-        database=database,
-        table=table,
-        description="c0+c1",
-        parameters={"num_cols": "2", "num_rows": "2"},
-        columns_comments={"c0": "zero", "c1": "one"},
-    )
-    df2 = wr.athena.read_sql_table(table, database)
-    assert df.shape == df2.shape
-    assert df.c1.sum() == df2.c1.astype(int).sum()
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == "2"
-    assert parameters["num_rows"] == "2"
-    assert wr.catalog.get_table_description(database, table) == "c0+c1"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c0"] == "zero"
-    assert comments["c1"] == "one"
 
-    # Round 6 - Overwrite Partitions
-    df = pd.DataFrame({"c0": [None, "boo"], "c1": [0, 2]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite_partitions", partition_cols=["c1"])[
-        "paths"
-    ]
+def test_athena_nested(path, database, table):
+    df = pd.DataFrame(
+        {
+            "c0": [[1, 2, 3], [4, 5, 6]],
+            "c1": [[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
+            "c2": [[["a", "b"], ["c", "d"]], [["e", "f"], ["g", "h"]]],
+            "c3": [[], [[[[[[[[1]]]]]]]]],
+            "c4": [{"a": 1}, {"a": 1}],
+            "c5": [{"a": {"b": {"c": [1, 2]}}}, {"a": {"b": {"c": [3, 4]}}}],
+        }
+    )
+    paths = wr.s3.to_parquet(
+        df=df, path=path, index=False, use_threads=True, dataset=True, mode="overwrite", database=database, table=table
+    )["paths"]
     wr.s3.wait_objects_exist(paths=paths)
-    wr.s3.store_parquet_metadata(
+    df2 = wr.athena.read_sql_query(sql=f"SELECT c0, c1, c2, c4 FROM {table}", database=database)
+    assert len(df2.index) == 2
+    assert len(df2.columns) == 4
+
+
+def test_catalog_versioning(bucket, database):
+    table = "test_catalog_versioning"
+    wr.catalog.delete_table_if_exists(database=database, table=table)
+    path = f"s3://{bucket}/{table}/"
+    wr.s3.delete_objects(path=path)
+
+    # Version 0
+    df = pd.DataFrame({"c0": [1, 2]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table, mode="overwrite")["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) == 2
+    assert len(df.columns) == 1
+    assert str(df.c0.dtype).startswith("Int")
+
+    # Version 1
+    df = pd.DataFrame({"c1": ["foo", "boo"]})
+    paths1 = wr.s3.to_parquet(
+        df=df, path=path, dataset=True, database=database, table=table, mode="overwrite", catalog_versioning=True
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths1, use_threads=False)
+    df = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) == 2
+    assert len(df.columns) == 1
+    assert str(df.c1.dtype) == "string"
+
+    # Version 2
+    df = pd.DataFrame({"c1": [1.0, 2.0]})
+    paths2 = wr.s3.to_csv(
+        df=df,
         path=path,
         dataset=True,
-        mode="append",
         database=database,
         table=table,
-        description="c0+c1",
-        parameters={"num_cols": "2", "num_rows": "3"},
-        columns_comments={"c0": "zero", "c1": "one"},
-    )
-    df2 = wr.athena.read_sql_table(table, database)
-    assert len(df2.columns) == 2
-    assert len(df2.index) == 3
-    assert df2.c1.astype(int).sum() == 3
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == "2"
-    assert parameters["num_rows"] == "3"
-    assert wr.catalog.get_table_description(database, table) == "c0+c1"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c0"] == "zero"
-    assert comments["c1"] == "one"
+        mode="overwrite",
+        catalog_versioning=True,
+        index=False,
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths2, use_threads=False)
+    wr.s3.wait_objects_not_exist(paths=paths1, use_threads=False)
+    df = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) == 2
+    assert len(df.columns) == 1
+    assert str(df.c1.dtype).startswith("float")
 
-    # Round 7 - Overwrite Partitions + New Column
-    df = pd.DataFrame({"c0": ["bar", None], "c1": [1, 3], "c2": [True, False]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite_partitions", partition_cols=["c1"])[
-        "paths"
-    ]
-    wr.s3.wait_objects_exist(paths=paths)
-    wr.s3.store_parquet_metadata(
+    # Version 3 (removing version 2)
+    df = pd.DataFrame({"c1": [True, False]})
+    paths3 = wr.s3.to_csv(
+        df=df,
         path=path,
         dataset=True,
-        mode="append",
         database=database,
         table=table,
-        description="c0+c1+c2",
-        parameters={"num_cols": "3", "num_rows": "4"},
-        columns_comments={"c0": "zero", "c1": "one", "c2": "two"},
+        mode="overwrite",
+        catalog_versioning=False,
+        index=False,
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths3, use_threads=False)
+    wr.s3.wait_objects_not_exist(paths=paths2, use_threads=False)
+    df = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) == 2
+    assert len(df.columns) == 1
+    assert str(df.c1.dtype).startswith("boolean")
+
+    # Cleaning Up
+    wr.catalog.delete_table_if_exists(database=database, table=table)
+    wr.s3.delete_objects(path=path)
+
+
+def test_copy_replacing_filename(bucket):
+    path = f"s3://{bucket}/test_copy_replacing_filename/"
+    wr.s3.delete_objects(path=path)
+    df = pd.DataFrame({"c0": [1, 2]})
+    file_path = f"{path}myfile.parquet"
+    wr.s3.to_parquet(df=df, path=file_path)
+    wr.s3.wait_objects_exist(paths=[file_path], use_threads=False)
+    path2 = f"s3://{bucket}/test_copy_replacing_filename2/"
+    wr.s3.copy_objects(
+        paths=[file_path], source_path=path, target_path=path2, replace_filenames={"myfile.parquet": "myfile2.parquet"}
     )
-    df2 = wr.athena.read_sql_table(table, database)
-    assert len(df2.columns) == 3
-    assert len(df2.index) == 4
-    assert df2.c1.astype(int).sum() == 6
-    parameters = wr.catalog.get_table_parameters(database, table)
-    assert len(parameters) >= 5
-    assert parameters["num_cols"] == "3"
-    assert parameters["num_rows"] == "4"
-    assert wr.catalog.get_table_description(database, table) == "c0+c1+c2"
-    comments = wr.catalog.get_columns_comments(database, table)
-    assert len(comments) == len(df.columns)
-    assert comments["c0"] == "zero"
-    assert comments["c1"] == "one"
-    assert comments["c2"] == "two"
-    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
-    df3 = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
-    assert len(df3.columns) == 3
-    assert len(df3.index) == 4
-    assert df3.c1.astype(int).sum() == 6
+    expected_file = f"{path2}myfile2.parquet"
+    wr.s3.wait_objects_exist(paths=[expected_file], use_threads=False)
+    objs = wr.s3.list_objects(path=path2)
+    assert objs[0] == expected_file
+    wr.s3.delete_objects(path=path)
+    wr.s3.delete_objects(path=path2)
 
 
-@pytest.mark.parametrize("partition_cols", [None, ["c1"], ["c2"], ["c1", "c2"], ["c2", "c1"]])
-def test_to_parquet_reverse_partitions(database, table, path, partition_cols):
-    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2": [6, 7, 8]})
-    paths = wr.s3.to_parquet(
-        df=df, path=path, dataset=True, database=database, table=table, partition_cols=partition_cols
-    )["paths"]
+def test_unsigned_parquet(bucket, database, external_schema):
+    table = "test_unsigned_parquet"
+    path = f"s3://{bucket}/{table}/"
+    wr.s3.delete_objects(path=path)
+    df = pd.DataFrame({"c0": [0, 0, (2 ** 8) - 1], "c1": [0, 0, (2 ** 16) - 1], "c2": [0, 0, (2 ** 32) - 1]})
+    df["c0"] = df.c0.astype("uint8")
+    df["c1"] = df.c1.astype("uint16")
+    df["c2"] = df.c2.astype("uint32")
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table, mode="overwrite")["paths"]
     wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.athena.read_sql_table(table=table, database=database)
-    assert df.shape == df2.shape
-    assert df.c0.sum() == df2.c0.sum()
-    assert df.c1.sum() == df2.c1.sum()
-    assert df.c2.sum() == df2.c2.sum()
+    df = wr.athena.read_sql_table(table=table, database=database)
+    assert df.c0.sum() == (2 ** 8) - 1
+    assert df.c1.sum() == (2 ** 16) - 1
+    assert df.c2.sum() == (2 ** 32) - 1
+    schema = wr.s3.read_parquet_metadata(path=path)[0]
+    assert schema["c0"] == "smallint"
+    assert schema["c1"] == "int"
+    assert schema["c2"] == "bigint"
+    df = wr.s3.read_parquet(path=path)
+    assert df.c0.sum() == (2 ** 8) - 1
+    assert df.c1.sum() == (2 ** 16) - 1
+    assert df.c2.sum() == (2 ** 32) - 1
+    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
+    df = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
+    assert df.c0.sum() == (2 ** 8) - 1
+    assert df.c1.sum() == (2 ** 16) - 1
+    assert df.c2.sum() == (2 ** 32) - 1
+
+    df = pd.DataFrame({"c0": [0, 0, (2 ** 64) - 1]})
+    df["c0"] = df.c0.astype("uint64")
+    with pytest.raises(wr.exceptions.UnsupportedType):
+        wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table, mode="overwrite")
 
+    wr.s3.delete_objects(path=path)
+    wr.catalog.delete_table_if_exists(database=database, table=table)
 
-def test_to_parquet_nested_append(database, table, path):
+
+def test_parquet_uint64(bucket):
+    path = f"s3://{bucket}/test_parquet_uint64/"
+    wr.s3.delete_objects(path=path)
     df = pd.DataFrame(
         {
-            "c0": [[1, 2, 3], [4, 5, 6]],
-            "c1": [[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
-            "c2": [[["a", "b"], ["c", "d"]], [["e", "f"], ["g", "h"]]],
-            "c3": [[], [[[[[[[[1]]]]]]]]],
-            "c4": [{"a": 1}, {"a": 1}],
-            "c5": [{"a": {"b": {"c": [1, 2]}}}, {"a": {"b": {"c": [3, 4]}}}],
+            "c0": [0, 0, (2 ** 8) - 1],
+            "c1": [0, 0, (2 ** 16) - 1],
+            "c2": [0, 0, (2 ** 32) - 1],
+            "c3": [0, 0, (2 ** 64) - 1],
+            "c4": [0, 1, 2],
         }
     )
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table)["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.athena.read_sql_query(sql=f"SELECT c0, c1, c2, c4 FROM {table}", database=database)
-    assert len(df2.index) == 2
-    assert len(df2.columns) == 4
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table)["paths"]
+    print(df)
+    df["c0"] = df.c0.astype("uint8")
+    df["c1"] = df.c1.astype("uint16")
+    df["c2"] = df.c2.astype("uint32")
+    df["c3"] = df.c3.astype("uint64")
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, mode="overwrite", partition_cols=["c4"])["paths"]
     wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.athena.read_sql_query(sql=f"SELECT c0, c1, c2, c4 FROM {table}", database=database)
-    assert len(df2.index) == 4
-    assert len(df2.columns) == 4
+    df = wr.s3.read_parquet(path=path, dataset=True)
+    print(df)
+    print(df.dtypes)
+    assert len(df.index) == 3
+    assert len(df.columns) == 5
+    assert df.c0.max() == (2 ** 8) - 1
+    assert df.c1.max() == (2 ** 16) - 1
+    assert df.c2.max() == (2 ** 32) - 1
+    assert df.c3.max() == (2 ** 64) - 1
+    assert df.c4.astype("uint8").sum() == 3
+    wr.s3.delete_objects(path=path)
+
 
+def test_parquet_overwrite_partition_cols(path, database, table, external_schema):
+    df = pd.DataFrame({"c0": [1, 2, 1, 2], "c1": [1, 2, 1, 2], "c2": [2, 1, 2, 1]})
 
-def test_to_parquet_nested_cast(database, table, path):
-    df = pd.DataFrame({"c0": [[1, 2, 3], [4, 5, 6]], "c1": [[], []], "c2": [{"a": 1, "b": 2}, {"a": 3, "b": 4}]})
     paths = wr.s3.to_parquet(
-        df=df,
+        df=df, path=path, dataset=True, database=database, table=table, mode="overwrite", partition_cols=["c2"]
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) == 4
+    assert len(df.columns) == 3
+    assert df.c0.sum() == 6
+    assert df.c1.sum() == 6
+    assert df.c2.sum() == 6
+
+    paths = wr.s3.to_parquet(
+        df=df, path=path, dataset=True, database=database, table=table, mode="overwrite", partition_cols=["c1", "c2"]
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) == 4
+    assert len(df.columns) == 3
+    assert df.c0.sum() == 6
+    assert df.c1.sum() == 6
+    assert df.c2.sum() == 6
+
+    engine = wr.catalog.get_engine("aws-data-wrangler-redshift")
+    df = wr.db.read_sql_table(con=engine, table=table, schema=external_schema)
+    assert len(df.index) == 4
+    assert len(df.columns) == 3
+    assert df.c0.sum() == 6
+    assert df.c1.sum() == 6
+    assert df.c2.sum() == 6
+
+
+def test_catalog_parameters(bucket, database):
+    table = "test_catalog_parameters"
+    path = f"s3://{bucket}/{table}/"
+    wr.s3.delete_objects(path=path)
+    wr.catalog.delete_table_if_exists(database=database, table=table)
+
+    wr.s3.to_parquet(
+        df=pd.DataFrame({"c0": [1, 2]}),
         path=path,
         dataset=True,
         database=database,
         table=table,
-        dtype={"c0": "array<double>", "c1": "array<string>", "c2": "struct<a:bigint, b:double>"},
-    )["paths"]
-    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df = pd.DataFrame({"c0": [[1, 2, 3], [4, 5, 6]], "c1": [["a"], ["b"]], "c2": [{"a": 1, "b": 2}, {"a": 3, "b": 4}]})
-    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table)["paths"]
+        mode="overwrite",
+        parameters={"a": "1", "b": "2"},
+    )
+    pars = wr.catalog.get_table_parameters(database=database, table=table)
+    assert pars["a"] == "1"
+    assert pars["b"] == "2"
+    pars["a"] = "0"
+    pars["c"] = "3"
+    wr.catalog.upsert_table_parameters(parameters=pars, database=database, table=table)
+    pars = wr.catalog.get_table_parameters(database=database, table=table)
+    assert pars["a"] == "0"
+    assert pars["b"] == "2"
+    assert pars["c"] == "3"
+    wr.catalog.overwrite_table_parameters(parameters={"d": "4"}, database=database, table=table)
+    pars = wr.catalog.get_table_parameters(database=database, table=table)
+    assert pars.get("a") is None
+    assert pars.get("b") is None
+    assert pars.get("c") is None
+    assert pars["d"] == "4"
+    df = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) == 2
+    assert len(df.columns) == 1
+    assert df.c0.sum() == 3
+
+    wr.s3.to_parquet(
+        df=pd.DataFrame({"c0": [3, 4]}),
+        path=path,
+        dataset=True,
+        database=database,
+        table=table,
+        mode="append",
+        parameters={"e": "5"},
+    )
+    pars = wr.catalog.get_table_parameters(database=database, table=table)
+    assert pars.get("a") is None
+    assert pars.get("b") is None
+    assert pars.get("c") is None
+    assert pars["d"] == "4"
+    assert pars["e"] == "5"
+    df = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) == 4
+    assert len(df.columns) == 1
+    assert df.c0.sum() == 10
+
+    wr.s3.delete_objects(path=path)
+    wr.catalog.delete_table_if_exists(database=database, table=table)
+
+
+def test_metadata_partitions(path):
+    path = f"{path}0.parquet"
+    df = pd.DataFrame({"c0": [0, 1, 2], "c1": ["3", "4", "5"], "c2": [6.0, 7.0, 8.0]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=False)["paths"]
     wr.s3.wait_objects_exist(paths=paths, use_threads=False)
-    df2 = wr.athena.read_sql_query(sql=f"SELECT c0, c2 FROM {table}", database=database)
-    assert len(df2.index) == 4
-    assert len(df2.columns) == 2
+    columns_types, partitions_types = wr.s3.read_parquet_metadata(path=path, dataset=False)
+    assert len(columns_types) == len(df.columns)
+    assert columns_types.get("c0") == "bigint"
+    assert columns_types.get("c1") == "string"
+    assert columns_types.get("c2") == "double"
diff --git a/testing/test_awswrangler/test_data_lake2.py b/testing/test_awswrangler/test_data_lake2.py
new file mode 100644
index 000000000..cbb78fb41
--- /dev/null
+++ b/testing/test_awswrangler/test_data_lake2.py
@@ -0,0 +1,425 @@
+import itertools
+import logging
+
+import boto3
+import pandas as pd
+import pytest
+
+import awswrangler as wr
+
+from ._utils import dt, extract_cloudformation_outputs, get_time_str_with_random_suffix, path_generator, ts
+
+logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s][%(name)s][%(funcName)s] %(message)s")
+logging.getLogger("awswrangler").setLevel(logging.DEBUG)
+logging.getLogger("botocore.credentials").setLevel(logging.CRITICAL)
+
+
+@pytest.fixture(scope="module")
+def cloudformation_outputs():
+    yield extract_cloudformation_outputs()
+
+
+@pytest.fixture(scope="module")
+def region(cloudformation_outputs):
+    yield cloudformation_outputs["Region"]
+
+
+@pytest.fixture(scope="module")
+def database(cloudformation_outputs):
+    yield cloudformation_outputs["GlueDatabaseName"]
+
+
+@pytest.fixture(scope="module")
+def external_schema(cloudformation_outputs, database):
+    region = cloudformation_outputs.get("Region")
+    sql = f"""
+    CREATE EXTERNAL SCHEMA IF NOT EXISTS aws_data_wrangler_external FROM data catalog
+    DATABASE '{database}'
+    IAM_ROLE '{cloudformation_outputs["RedshiftRole"]}'
+    REGION '{region}';
+    """
+    engine = wr.catalog.get_engine(connection="aws-data-wrangler-redshift")
+    with engine.connect() as con:
+        con.execute(sql)
+    yield "aws_data_wrangler_external"
+
+
+@pytest.fixture(scope="function")
+def path(cloudformation_outputs):
+    yield from path_generator(cloudformation_outputs["BucketName"])
+
+
+@pytest.fixture(scope="function")
+def table(database):
+    name = f"tbl_{get_time_str_with_random_suffix()}"
+    print(f"Table name: {name}")
+    wr.catalog.delete_table_if_exists(database=database, table=name)
+    yield name
+    wr.catalog.delete_table_if_exists(database=database, table=name)
+
+
+@pytest.mark.parametrize("partition_cols", [None, ["c2"], ["c1", "c2"]])
+def test_metadata_partitions_dataset(path, partition_cols):
+    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2": [6, 7, 8]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=partition_cols)["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    columns_types, partitions_types = wr.s3.read_parquet_metadata(path=path, dataset=True)
+    partitions_types = partitions_types if partitions_types is not None else {}
+    assert len(columns_types) + len(partitions_types) == len(df.columns)
+    assert columns_types.get("c0") == "bigint"
+    assert (columns_types.get("c1") == "bigint") or (partitions_types.get("c1") == "string")
+    assert (columns_types.get("c1") == "bigint") or (partitions_types.get("c1") == "string")
+
+
+@pytest.mark.parametrize("partition_cols", [None, ["c2"], ["c1", "c2"]])
+def test_store_metadata_partitions_dataset(database, table, path, partition_cols):
+    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2": [6, 7, 8]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=partition_cols)["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    wr.s3.store_parquet_metadata(path=path, database=database, table=table, dataset=True)
+    df2 = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) == len(df2.index)
+    assert len(df.columns) == len(df2.columns)
+    assert df.c0.sum() == df2.c0.sum()
+    assert df.c1.sum() == df2.c1.astype(int).sum()
+    assert df.c2.sum() == df2.c2.astype(int).sum()
+
+
+def test_json_chunksize(path):
+    num_files = 10
+    df = pd.DataFrame({"id": [1, 2, 3], "value": ["foo", "boo", "bar"]})
+    paths = [f"{path}{i}.json" for i in range(num_files)]
+    for p in paths:
+        wr.s3.to_json(df, p, orient="records", lines=True)
+    wr.s3.wait_objects_exist(paths)
+    dfs = list(wr.s3.read_json(paths, lines=True, chunksize=1))
+    assert len(dfs) == (3 * num_files)
+    for d in dfs:
+        assert len(d.columns) == 2
+        assert d.id.iloc[0] in (1, 2, 3)
+        assert d.value.iloc[0] in ("foo", "boo", "bar")
+
+
+def test_parquet_cast_string(path):
+    df = pd.DataFrame({"id": [1, 2, 3], "value": ["foo", "boo", "bar"]})
+    path_file = f"{path}0.parquet"
+    wr.s3.to_parquet(df, path_file, dtype={"id": "string"})
+    wr.s3.wait_objects_exist([path_file])
+    df2 = wr.s3.read_parquet(path_file)
+    assert str(df2.id.dtypes) == "string"
+    df2["id"] = df2["id"].astype(int)
+    assert df.shape == df2.shape
+    for col, row in tuple(itertools.product(df.columns, range(3))):
+        assert df[col].iloc[row] == df2[col].iloc[row]
+
+
+@pytest.mark.parametrize("partition_cols", [None, ["c2"], ["value", "c2"]])
+def test_parquet_cast_string_dataset(path, partition_cols):
+    df = pd.DataFrame({"id": [1, 2, 3], "value": ["foo", "boo", "bar"], "c2": [4, 5, 6], "c3": [7.0, 8.0, 9.0]})
+    paths = wr.s3.to_parquet(
+        df, path, dataset=True, partition_cols=partition_cols, dtype={"id": "string", "c3": "string"}
+    )["paths"]
+    wr.s3.wait_objects_exist(paths)
+    df2 = wr.s3.read_parquet(path, dataset=True).sort_values("id", ignore_index=True)
+    assert str(df2.id.dtypes) == "string"
+    assert str(df2.c3.dtypes) == "string"
+    df2["id"] = df2["id"].astype(int)
+    df2["c3"] = df2["c3"].astype(float)
+    assert df.shape == df2.shape
+    for col, row in tuple(itertools.product(df.columns, range(3))):
+        assert df[col].iloc[row] == df2[col].iloc[row]
+
+
+@pytest.mark.parametrize("partition_cols", [None, ["c2"], ["c1", "c2"]])
+def test_store_metadata_partitions_sample_dataset(database, table, path, partition_cols):
+    num_files = 10
+    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2": [6, 7, 8]})
+    for _ in range(num_files):
+        paths = wr.s3.to_parquet(df=df, path=path, dataset=True, partition_cols=partition_cols)["paths"]
+        wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    wr.s3.store_parquet_metadata(
+        path=path, database=database, table=table, dtype={"c1": "bigint", "c2": "smallint"}, sampling=0.25, dataset=True
+    )
+    df2 = wr.athena.read_sql_table(table=table, database=database)
+    assert len(df.index) * num_files == len(df2.index)
+    assert len(df.columns) == len(df2.columns)
+    assert df.c0.sum() * num_files == df2.c0.sum()
+    assert df.c1.sum() * num_files == df2.c1.sum()
+    assert df.c2.sum() * num_files == df2.c2.sum()
+
+
+def test_athena_undefined_column(database):
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        wr.athena.read_sql_query("SELECT 1", database)
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        wr.athena.read_sql_query("SELECT NULL AS my_null", database)
+
+
+def test_to_parquet_file_sanitize(path):
+    df = pd.DataFrame({"C0": [0, 1], "camelCase": [2, 3], "c**--2": [4, 5]})
+    path_file = f"{path}0.parquet"
+    wr.s3.to_parquet(df, path_file)
+    wr.s3.wait_objects_exist([path_file])
+    df2 = wr.s3.read_parquet(path_file)
+    assert df.shape == df2.shape
+    assert list(df2.columns) == ["c0", "camel_case", "c_2"]
+    assert df2.c0.sum() == 1
+    assert df2.camel_case.sum() == 5
+    assert df2.c_2.sum() == 9
+
+
+@pytest.mark.parametrize("partition_cols", [None, ["c1"], ["c2"], ["c1", "c2"], ["c2", "c1"]])
+def test_to_parquet_reverse_partitions(database, table, path, partition_cols):
+    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5], "c2": [6, 7, 8]})
+    paths = wr.s3.to_parquet(
+        df=df, path=path, dataset=True, database=database, table=table, partition_cols=partition_cols
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.athena.read_sql_table(table=table, database=database)
+    assert df.shape == df2.shape
+    assert df.c0.sum() == df2.c0.sum()
+    assert df.c1.sum() == df2.c1.sum()
+    assert df.c2.sum() == df2.c2.sum()
+
+
+def test_to_parquet_nested_append(database, table, path):
+    df = pd.DataFrame(
+        {
+            "c0": [[1, 2, 3], [4, 5, 6]],
+            "c1": [[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
+            "c2": [[["a", "b"], ["c", "d"]], [["e", "f"], ["g", "h"]]],
+            "c3": [[], [[[[[[[[1]]]]]]]]],
+            "c4": [{"a": 1}, {"a": 1}],
+            "c5": [{"a": {"b": {"c": [1, 2]}}}, {"a": {"b": {"c": [3, 4]}}}],
+        }
+    )
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table)["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.athena.read_sql_query(sql=f"SELECT c0, c1, c2, c4 FROM {table}", database=database)
+    assert len(df2.index) == 2
+    assert len(df2.columns) == 4
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table)["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.athena.read_sql_query(sql=f"SELECT c0, c1, c2, c4 FROM {table}", database=database)
+    assert len(df2.index) == 4
+    assert len(df2.columns) == 4
+
+
+def test_to_parquet_nested_cast(database, table, path):
+    df = pd.DataFrame({"c0": [[1, 2, 3], [4, 5, 6]], "c1": [[], []], "c2": [{"a": 1, "b": 2}, {"a": 3, "b": 4}]})
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        database=database,
+        table=table,
+        dtype={"c0": "array<double>", "c1": "array<string>", "c2": "struct<a:bigint, b:double>"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df = pd.DataFrame({"c0": [[1, 2, 3], [4, 5, 6]], "c1": [["a"], ["b"]], "c2": [{"a": 1, "b": 2}, {"a": 3, "b": 4}]})
+    paths = wr.s3.to_parquet(df=df, path=path, dataset=True, database=database, table=table)["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.athena.read_sql_query(sql=f"SELECT c0, c2 FROM {table}", database=database)
+    assert len(df2.index) == 4
+    assert len(df2.columns) == 2
+
+
+@pytest.mark.parametrize(
+    "encoding,strings,wrong_encoding,exception",
+    [
+        ("utf-8", ["漢字", "ãóú", "г, д, ж, з, к, л"], "ISO-8859-1", AssertionError),
+        ("ISO-8859-1", ["Ö, ö, Ü, ü", "ãóú", "øe"], "utf-8", UnicodeDecodeError),
+        ("ISO-8859-1", ["Ö, ö, Ü, ü", "ãóú", "øe"], None, UnicodeDecodeError),
+    ],
+)
+@pytest.mark.parametrize("line_terminator", ["\n", "\r"])
+def test_csv_encoding(path, encoding, strings, wrong_encoding, exception, line_terminator):
+    file_path = f"{path}0.csv"
+    df = pd.DataFrame({"c0": [1, 2, 3], "c1": strings})
+    wr.s3.to_csv(df, file_path, index=False, encoding=encoding, line_terminator=line_terminator)
+    wr.s3.wait_objects_exist(paths=[file_path])
+    df2 = wr.s3.read_csv(file_path, encoding=encoding, lineterminator=line_terminator)
+    assert df.equals(df2)
+    with pytest.raises(exception):
+        df2 = wr.s3.read_csv(file_path, encoding=wrong_encoding)
+        assert df.equals(df2)
+
+
+def test_to_parquet_file_dtype(path):
+    df = pd.DataFrame({"c0": [1.0, None, 2.0], "c1": [pd.NA, pd.NA, pd.NA]})
+    file_path = f"{path}0.parquet"
+    wr.s3.to_parquet(df, file_path, dtype={"c0": "bigint", "c1": "string"})
+    wr.s3.wait_objects_exist(paths=[file_path])
+    df2 = wr.s3.read_parquet(file_path)
+    assert df2.shape == df.shape
+    assert df2.c0.sum() == 3
+    assert str(df2.c0.dtype) == "Int64"
+    assert str(df2.c1.dtype) == "string"
+
+
+def test_to_parquet_projection_integer(database, table, path):
+    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [0, 1, 2], "c2": [0, 100, 200], "c3": [0, 1, 2]})
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        database=database,
+        table=table,
+        partition_cols=["c1", "c2", "c3"],
+        regular_partitions=False,
+        projection_enabled=True,
+        projection_types={"c1": "integer", "c2": "integer", "c3": "integer"},
+        projection_ranges={"c1": "0,2", "c2": "0,200", "c3": "0,2"},
+        projection_intervals={"c2": "100"},
+        projection_digits={"c3": "1"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.athena.read_sql_table(table, database)
+    assert df.shape == df2.shape
+    assert df.c0.sum() == df2.c0.sum()
+    assert df.c1.sum() == df2.c1.sum()
+    assert df.c2.sum() == df2.c2.sum()
+    assert df.c3.sum() == df2.c3.sum()
+
+
+def test_to_parquet_projection_enum(database, table, path):
+    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [1, 2, 3], "c2": ["foo", "boo", "bar"]})
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        database=database,
+        table=table,
+        partition_cols=["c1", "c2"],
+        regular_partitions=False,
+        projection_enabled=True,
+        projection_types={"c1": "enum", "c2": "enum"},
+        projection_values={"c1": "1,2,3", "c2": "foo,boo,bar"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.athena.read_sql_table(table, database)
+    assert df.shape == df2.shape
+    assert df.c0.sum() == df2.c0.sum()
+    assert df.c1.sum() == df2.c1.sum()
+
+
+def test_to_parquet_projection_date(database, table, path):
+    df = pd.DataFrame(
+        {
+            "c0": [0, 1, 2],
+            "c1": [dt("2020-01-01"), dt("2020-01-02"), dt("2020-01-03")],
+            "c2": [ts("2020-01-01 01:01:01.0"), ts("2020-01-01 01:01:02.0"), ts("2020-01-01 01:01:03.0")],
+        }
+    )
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        database=database,
+        table=table,
+        partition_cols=["c1", "c2"],
+        regular_partitions=False,
+        projection_enabled=True,
+        projection_types={"c1": "date", "c2": "date"},
+        projection_ranges={"c1": "2020-01-01,2020-01-03", "c2": "2020-01-01 01:01:00,2020-01-01 01:01:03"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.athena.read_sql_table(table, database)
+    print(df2)
+    assert df.shape == df2.shape
+    assert df.c0.sum() == df2.c0.sum()
+
+
+def test_to_parquet_projection_injected(database, table, path):
+    df = pd.DataFrame({"c0": [0, 1, 2], "c1": ["foo", "boo", "bar"], "c2": ["0", "1", "2"]})
+    paths = wr.s3.to_parquet(
+        df=df,
+        path=path,
+        dataset=True,
+        database=database,
+        table=table,
+        partition_cols=["c1", "c2"],
+        regular_partitions=False,
+        projection_enabled=True,
+        projection_types={"c1": "injected", "c2": "injected"},
+    )["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.athena.read_sql_query(f"SELECT * FROM {table} WHERE c1='foo' AND c2='0'", database)
+    assert df2.shape == (1, 3)
+    assert df2.c0.iloc[0] == 0
+
+
+def test_read_parquet_filter_partitions(path):
+    df = pd.DataFrame({"c0": [0, 1, 2], "c1": [0, 1, 2], "c2": [0, 0, 1]})
+    paths = wr.s3.to_parquet(df, path, dataset=True, partition_cols=["c1", "c2"])["paths"]
+    wr.s3.wait_objects_exist(paths=paths, use_threads=False)
+    df2 = wr.s3.read_parquet(path, dataset=True, filters=[("c1", "==", "0")])
+    assert df2.shape == (1, 3)
+    assert df2.c0.iloc[0] == 0
+    assert df2.c1.iloc[0] == 0
+    assert df2.c2.iloc[0] == 0
+    df2 = wr.s3.read_parquet(path, dataset=True, filters=[("c1", "==", "1"), ("c2", "==", "0")])
+    assert df2.shape == (1, 3)
+    assert df2.c0.iloc[0] == 1
+    assert df2.c1.iloc[0] == 1
+    assert df2.c2.iloc[0] == 0
+    df2 = wr.s3.read_parquet(path, dataset=True, filters=[("c2", "==", "0")])
+    assert df2.shape == (2, 3)
+    assert df2.c0.astype(int).sum() == 1
+    assert df2.c1.astype(int).sum() == 1
+    assert df2.c2.astype(int).sum() == 0
+
+
+@pytest.mark.parametrize("use_threads", [True, False])
+@pytest.mark.parametrize("chunksize", [None, 1])
+def test_read_partitioned_json(path, use_threads, chunksize):
+    df = pd.DataFrame({"c0": [0, 1], "c1": ["foo", "boo"]})
+    paths = [f"{path}year={y}/month={m}/0.json" for y, m in [(2020, 1), (2020, 2), (2021, 1)]]
+    for p in paths:
+        wr.s3.to_json(df, p, orient="records", lines=True)
+    wr.s3.wait_objects_exist(paths, use_threads=False)
+    df2 = wr.s3.read_json(path, dataset=True, use_threads=use_threads, chunksize=chunksize)
+    if chunksize is None:
+        assert df2.shape == (6, 4)
+        assert df2.c0.sum() == 3
+    else:
+        for d in df2:
+            assert d.shape == (1, 4)
+
+
+@pytest.mark.parametrize("use_threads", [True, False])
+@pytest.mark.parametrize("chunksize", [None, 1])
+def test_read_partitioned_csv(path, use_threads, chunksize):
+    df = pd.DataFrame({"c0": [0, 1], "c1": ["foo", "boo"]})
+    paths = [f"{path}year={y}/month={m}/0.csv" for y, m in [(2020, 1), (2020, 2), (2021, 1)]]
+    for p in paths:
+        wr.s3.to_csv(df, p, index=False)
+    wr.s3.wait_objects_exist(paths, use_threads=False)
+    df2 = wr.s3.read_csv(path, dataset=True, use_threads=use_threads, chunksize=chunksize)
+    if chunksize is None:
+        assert df2.shape == (6, 4)
+        assert df2.c0.sum() == 3
+    else:
+        for d in df2:
+            assert d.shape == (1, 4)
+
+
+@pytest.mark.parametrize("use_threads", [True, False])
+@pytest.mark.parametrize("chunksize", [None, 1])
+def test_read_partitioned_fwf(path, use_threads, chunksize):
+    text = "0foo\n1boo"
+    client_s3 = boto3.client("s3")
+    paths = [f"{path}year={y}/month={m}/0.csv" for y, m in [(2020, 1), (2020, 2), (2021, 1)]]
+    for p in paths:
+        bucket, key = wr._utils.parse_path(p)
+        client_s3.put_object(Body=text, Bucket=bucket, Key=key)
+    wr.s3.wait_objects_exist(paths, use_threads=False)
+    df2 = wr.s3.read_fwf(
+        path, dataset=True, use_threads=use_threads, chunksize=chunksize, widths=[1, 3], names=["c0", "c1"]
+    )
+    if chunksize is None:
+        assert df2.shape == (6, 4)
+        assert df2.c0.sum() == 3
+    else:
+        for d in df2:
+            assert d.shape == (1, 4)
diff --git a/testing/test_awswrangler/test_db.py b/testing/test_awswrangler/test_db.py
index 1e07f8bf9..4ff1e68ed 100644
--- a/testing/test_awswrangler/test_db.py
+++ b/testing/test_awswrangler/test_db.py
@@ -9,7 +9,13 @@
 
 import awswrangler as wr
 
-from ._utils import CFN_VALID_STATUS, ensure_data_types, ensure_data_types_category, get_df, get_df_category
+from ._utils import (
+    ensure_data_types,
+    ensure_data_types_category,
+    extract_cloudformation_outputs,
+    get_df,
+    get_df_category,
+)
 
 logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s][%(name)s][%(funcName)s] %(message)s")
 logging.getLogger("awswrangler").setLevel(logging.DEBUG)
@@ -18,12 +24,7 @@
 
 @pytest.fixture(scope="module")
 def cloudformation_outputs():
-    response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler")
-    stack = [x for x in response.get("Stacks") if x["StackStatus"] in CFN_VALID_STATUS][0]
-    outputs = {}
-    for output in stack.get("Outputs"):
-        outputs[output.get("OutputKey")] = output.get("OutputValue")
-    yield outputs
+    yield extract_cloudformation_outputs()
 
 
 @pytest.fixture(scope="module")
diff --git a/testing/test_awswrangler/test_emr.py b/testing/test_awswrangler/test_emr.py
index e414fc2e5..f67150f23 100644
--- a/testing/test_awswrangler/test_emr.py
+++ b/testing/test_awswrangler/test_emr.py
@@ -1,12 +1,11 @@
 import logging
 import time
 
-import boto3
 import pytest
 
 import awswrangler as wr
 
-from ._utils import CFN_VALID_STATUS
+from ._utils import extract_cloudformation_outputs
 
 logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s][%(name)s][%(funcName)s] %(message)s")
 logging.getLogger("awswrangler").setLevel(logging.DEBUG)
@@ -15,12 +14,7 @@
 
 @pytest.fixture(scope="module")
 def cloudformation_outputs():
-    response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler")
-    stack = [x for x in response.get("Stacks") if x["StackStatus"] in CFN_VALID_STATUS][0]
-    outputs = {}
-    for output in stack.get("Outputs"):
-        outputs[output.get("OutputKey")] = output.get("OutputValue")
-    yield outputs
+    yield extract_cloudformation_outputs()
 
 
 @pytest.fixture(scope="module")
diff --git a/testing/test_awswrangler/test_metadata.py b/testing/test_awswrangler/test_metadata.py
index 3dff3f55a..c8f0bc067 100644
--- a/testing/test_awswrangler/test_metadata.py
+++ b/testing/test_awswrangler/test_metadata.py
@@ -2,7 +2,7 @@
 
 
 def test_metadata():
-    assert wr.__version__ == "1.3.0"
+    assert wr.__version__ == "1.4.0"
     assert wr.__title__ == "awswrangler"
     assert wr.__description__ == "Pandas on AWS."
     assert wr.__license__ == "Apache License 2.0"
diff --git a/testing/test_awswrangler/test_moto.py b/testing/test_awswrangler/test_moto.py
index 01bde208f..71367f730 100644
--- a/testing/test_awswrangler/test_moto.py
+++ b/testing/test_awswrangler/test_moto.py
@@ -1,11 +1,15 @@
+from unittest.mock import ANY
+
 import boto3
 import botocore
 import mock
 import moto
+import pandas as pd
 import pytest
 from botocore.exceptions import ClientError
 
 import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame, InvalidArgumentCombination
 
 from ._utils import ensure_data_types, get_df_csv, get_df_list
 
@@ -217,6 +221,71 @@ def test_csv(s3):
     assert len(df.columns) == 10
 
 
+@mock.patch("pandas.read_csv")
+@mock.patch("s3fs.S3FileSystem.open")
+def test_read_csv_pass_pandas_arguments_and_encoding_succeed(mock_open, mock_read_csv, s3):
+    bucket = "bucket"
+    key = "foo/foo.csv"
+    path = "s3://{}/{}".format(bucket, key)
+    s3_object = s3.Object(bucket, key)
+    s3_object.put(Body=b"foo")
+
+    with pytest.raises(TypeError):
+        wr.s3.read_csv(path=path, encoding="ISO-8859-1", sep=",", lineterminator="\r\n")
+        mock_open.assert_called_with(path="s3://bucket/foo/foo.csv", mode="r", encoding="ISO-8859-1", newline="\r\n")
+        mock_read_csv.assert_called_with(ANY, compression=None, encoding="ISO-8859-1", sep=",", lineterminator="\r\n")
+
+
+def test_to_csv_invalid_argument_combination_raise_when_dataset_false_succeed(s3):
+    path = "s3://bucket/test.csv"
+    with pytest.raises(InvalidArgumentCombination):
+        wr.s3.to_csv(df=get_df_csv(), path=path, index=False, database="foo")
+
+    with pytest.raises(InvalidArgumentCombination):
+        wr.s3.to_csv(df=get_df_csv(), path=path, index=False, table="foo")
+
+    with pytest.raises(InvalidArgumentCombination):
+        wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=False, partition_cols=["par0", "par1"])
+
+    with pytest.raises(InvalidArgumentCombination):
+        wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=False, mode="append")
+
+    with pytest.raises(InvalidArgumentCombination):
+        wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=False, partition_cols=["par0", "par1"])
+
+    with pytest.raises(InvalidArgumentCombination):
+        wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=False, database="default", table="test")
+
+    with pytest.raises(InvalidArgumentCombination):
+        wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=False, description="raise exception")
+
+    with pytest.raises(InvalidArgumentCombination):
+        wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=False, parameters={"key": "value"})
+
+    with pytest.raises(InvalidArgumentCombination):
+        wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=False, columns_comments={"col0": "test"})
+
+
+def test_to_csv_valid_argument_combination_when_dataset_true_succeed(s3):
+    path = "s3://bucket/test.csv"
+    wr.s3.to_csv(df=get_df_csv(), path=path, index=False)
+    wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=True, partition_cols=["par0", "par1"])
+
+    wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=True, mode="append")
+
+    wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=True, description="raise exception")
+
+    wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=True, parameters={"key": "value"})
+
+    wr.s3.to_csv(df=get_df_csv(), path=path, index=False, dataset=True, columns_comments={"col0": "test"})
+
+
+def test_to_csv_data_empty_raise_succeed(s3):
+    path = "s3://bucket/test.csv"
+    with pytest.raises(EmptyDataFrame):
+        wr.s3.to_csv(df=pd.DataFrame(), path=path, index=False)
+
+
 def test_parquet(s3):
     path = "s3://bucket/test.parquet"
     wr.s3.to_parquet(df=get_df_list(), path=path, index=False, dataset=True, partition_cols=["par0", "par1"])
diff --git a/testing/validations.sh b/testing/validations.sh
index d32fc7808..db65119b2 100755
--- a/testing/validations.sh
+++ b/testing/validations.sh
@@ -7,8 +7,8 @@ cfn-flip -c -l -n cloudformation.yaml temp.yaml
 cfn-lint -t temp.yaml
 mv temp.yaml cloudformation.yaml
 pushd ..
+isort -rc awswrangler testing/test_awswrangler
 black --line-length 120 --target-version py36 awswrangler testing/test_awswrangler
-isort -rc --line-width 120 awswrangler testing/test_awswrangler
 pydocstyle awswrangler/ --add-ignore=D204,D403
 mypy awswrangler
 flake8 setup.py awswrangler testing/test_awswrangler
diff --git a/tutorials/01 - Introduction.ipynb b/tutorials/001 - Introduction.ipynb
similarity index 100%
rename from tutorials/01 - Introduction.ipynb
rename to tutorials/001 - Introduction.ipynb
diff --git a/tutorials/02 - Sessions.ipynb b/tutorials/002 - Sessions.ipynb
similarity index 100%
rename from tutorials/02 - Sessions.ipynb
rename to tutorials/002 - Sessions.ipynb
diff --git a/tutorials/03 - Amazon S3.ipynb b/tutorials/003 - Amazon S3.ipynb
similarity index 100%
rename from tutorials/03 - Amazon S3.ipynb
rename to tutorials/003 - Amazon S3.ipynb
diff --git a/tutorials/04 - Parquet Datasets.ipynb b/tutorials/004 - Parquet Datasets.ipynb
similarity index 100%
rename from tutorials/04 - Parquet Datasets.ipynb
rename to tutorials/004 - Parquet Datasets.ipynb
diff --git a/tutorials/05 - Glue Catalog.ipynb b/tutorials/005 - Glue Catalog.ipynb
similarity index 100%
rename from tutorials/05 - Glue Catalog.ipynb
rename to tutorials/005 - Glue Catalog.ipynb
diff --git a/tutorials/06 - Amazon Athena.ipynb b/tutorials/006 - Amazon Athena.ipynb
similarity index 100%
rename from tutorials/06 - Amazon Athena.ipynb
rename to tutorials/006 - Amazon Athena.ipynb
diff --git a/tutorials/07 - Redshift, MySQL, PostgreSQL.ipynb b/tutorials/007 - Redshift, MySQL, PostgreSQL.ipynb
similarity index 100%
rename from tutorials/07 - Redshift, MySQL, PostgreSQL.ipynb
rename to tutorials/007 - Redshift, MySQL, PostgreSQL.ipynb
diff --git a/tutorials/08 - Redshift - Copy & Unload.ipynb b/tutorials/008 - Redshift - Copy & Unload.ipynb
similarity index 100%
rename from tutorials/08 - Redshift - Copy & Unload.ipynb
rename to tutorials/008 - Redshift - Copy & Unload.ipynb
diff --git a/tutorials/09 - Redshift - Append, Overwrite, Upsert.ipynb b/tutorials/009 - Redshift - Append, Overwrite, Upsert.ipynb
similarity index 100%
rename from tutorials/09 - Redshift - Append, Overwrite, Upsert.ipynb
rename to tutorials/009 - Redshift - Append, Overwrite, Upsert.ipynb
diff --git a/tutorials/10 - Parquet Crawler.ipynb b/tutorials/010 - Parquet Crawler.ipynb
similarity index 100%
rename from tutorials/10 - Parquet Crawler.ipynb
rename to tutorials/010 - Parquet Crawler.ipynb
diff --git a/tutorials/11 - CSV Datasets.ipynb b/tutorials/011 - CSV Datasets.ipynb
similarity index 100%
rename from tutorials/11 - CSV Datasets.ipynb
rename to tutorials/011 - CSV Datasets.ipynb
diff --git a/tutorials/12 - CSV Crawler.ipynb b/tutorials/012 - CSV Crawler.ipynb
similarity index 99%
rename from tutorials/12 - CSV Crawler.ipynb
rename to tutorials/012 - CSV Crawler.ipynb
index d3e4bd710..68973a424 100644
--- a/tutorials/12 - CSV Crawler.ipynb	
+++ b/tutorials/012 - CSV Crawler.ipynb	
@@ -478,7 +478,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## You can also extract the metadata directly from the Catalog with you want"
+    "## You can also extract the metadata directly from the Catalog if you want"
    ]
   },
   {
@@ -691,17 +691,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.6.5"
-  },
-  "pycharm": {
-   "stem_cell": {
-    "cell_type": "raw",
-    "metadata": {
-     "collapsed": false
-    },
-    "source": []
-   }
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/tutorials/13 - Merging Datasets on S3.ipynb b/tutorials/013 - Merging Datasets on S3.ipynb
similarity index 100%
rename from tutorials/13 - Merging Datasets on S3.ipynb
rename to tutorials/013 - Merging Datasets on S3.ipynb
diff --git a/tutorials/14 - Schema Evolution.ipynb b/tutorials/014 - Schema Evolution.ipynb
similarity index 100%
rename from tutorials/14 - Schema Evolution.ipynb
rename to tutorials/014 - Schema Evolution.ipynb
diff --git a/tutorials/15 - EMR.ipynb b/tutorials/015 - EMR.ipynb
similarity index 100%
rename from tutorials/15 - EMR.ipynb
rename to tutorials/015 - EMR.ipynb
diff --git a/tutorials/16 - EMR & Docker.ipynb b/tutorials/016 - EMR & Docker.ipynb
similarity index 100%
rename from tutorials/16 - EMR & Docker.ipynb
rename to tutorials/016 - EMR & Docker.ipynb
diff --git a/tutorials/17 - Partition Projection.ipynb b/tutorials/017 - Partition Projection.ipynb
similarity index 100%
rename from tutorials/17 - Partition Projection.ipynb
rename to tutorials/017 - Partition Projection.ipynb