diff --git a/.github/workflows/static-checking.yml b/.github/workflows/static-checking.yml
index c1d9ff0cb..a23a74d99 100644
--- a/.github/workflows/static-checking.yml
+++ b/.github/workflows/static-checking.yml
@@ -24,15 +24,12 @@ jobs:
         uses: actions/setup-python@v1
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements.txt
-          pip install -r requirements-dev.txt
+      - name: Setup Environment
+        run: ./setup-dev-env.sh
       - name: CloudFormation Lint
         run: cfn-lint -t testing/cloudformation.yaml
       - name: Documentation Lint
-        run: pydocstyle awswrangler/ --add-ignore=D204
+        run: pydocstyle awswrangler/ --add-ignore=D204,D403
       - name: mypy check
         run: mypy awswrangler
       - name: Flake8 Lint
diff --git a/.pylintrc b/.pylintrc
index 132ce213a..4f41cb3fb 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -141,7 +141,8 @@ disable=print-statement,
         comprehension-escape,
         C0330,
         C0103,
-        W1202
+        W1202,
+        too-few-public-methods
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
diff --git a/README.md b/README.md
index 66095288c..ccb5dc669 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,7 @@ df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine)
   - [11 - CSV Datasets](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/11%20-%20CSV%20Datasets.ipynb)
   - [12 - CSV Crawler](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/12%20-%20CSV%20Crawler.ipynb)
   - [13 - Merging Datasets on S3](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/13%20-%20Merging%20Datasets%20on%20S3.ipynb)
+  - [14 - PyTorch](https://github.com/awslabs/aws-data-wrangler/blob/master/tutorials/14%20-%20PyTorch.ipynb)
   - [15 - EMR](https://github.com/awslabs/aws-data-wrangler/blob/dev/tutorials/15%20-%20EMR.ipynb)
   - [16 - EMR & Docker](https://github.com/awslabs/aws-data-wrangler/blob/dev/tutorials/16%20-%20EMR%20%26%20Docker.ipynb)
 - [**API Reference**](https://aws-data-wrangler.readthedocs.io/en/latest/api.html)
diff --git a/awswrangler/__init__.py b/awswrangler/__init__.py
index 4413ab5f4..78299541e 100644
--- a/awswrangler/__init__.py
+++ b/awswrangler/__init__.py
@@ -6,9 +6,13 @@
 """
 
 import logging
+from importlib.util import find_spec
 
 from awswrangler import athena, catalog, cloudwatch, db, emr, exceptions, s3  # noqa
 from awswrangler.__metadata__ import __description__, __license__, __title__, __version__  # noqa
 from awswrangler._utils import get_account_id  # noqa
 
+if find_spec("torch") and find_spec("torchvision") and find_spec("torchaudio") and find_spec("PIL"):
+    from awswrangler import torch  # noqa
+
 logging.getLogger("awswrangler").addHandler(logging.NullHandler())
diff --git a/awswrangler/_data_types.py b/awswrangler/_data_types.py
index 62928e816..947b058b0 100644
--- a/awswrangler/_data_types.py
+++ b/awswrangler/_data_types.py
@@ -207,7 +207,7 @@ def pyarrow2sqlalchemy(  # pylint: disable=too-many-branches,too-many-return-sta
         return sqlalchemy.types.Date
     if pa.types.is_binary(dtype):
         if db_type == "redshift":
-            raise exceptions.UnsupportedType(f"Binary columns are not supported for Redshift.")  # pragma: no cover
+            raise exceptions.UnsupportedType("Binary columns are not supported for Redshift.")  # pragma: no cover
         return sqlalchemy.types.Binary
     if pa.types.is_decimal(dtype):
         return sqlalchemy.types.Numeric(precision=dtype.precision, scale=dtype.scale)
@@ -257,7 +257,7 @@ def pyarrow_types_from_pandas(
     # Filling schema
     columns_types: Dict[str, pa.DataType]
     columns_types = {n: cols_dtypes[n] for n in sorted_cols}
-    _logger.debug(f"columns_types: {columns_types}")
+    _logger.debug("columns_types: %s", columns_types)
     return columns_types
 
 
@@ -275,7 +275,7 @@ def athena_types_from_pandas(
             athena_columns_types[k] = casts[k]
         else:
             athena_columns_types[k] = pyarrow2athena(dtype=v)
-    _logger.debug(f"athena_columns_types: {athena_columns_types}")
+    _logger.debug("athena_columns_types: %s", athena_columns_types)
     return athena_columns_types
 
 
@@ -315,7 +315,7 @@ def pyarrow_schema_from_pandas(
         if (k in df.columns) and (k not in ignore):
             columns_types[k] = athena2pyarrow(v)
     columns_types = {k: v for k, v in columns_types.items() if v is not None}
-    _logger.debug(f"columns_types: {columns_types}")
+    _logger.debug("columns_types: %s", columns_types)
     return pa.schema(fields=columns_types)
 
 
@@ -324,11 +324,11 @@ def athena_types_from_pyarrow_schema(
 ) -> Tuple[Dict[str, str], Optional[Dict[str, str]]]:
     """Extract the related Athena data types from any PyArrow Schema considering possible partitions."""
     columns_types: Dict[str, str] = {str(f.name): pyarrow2athena(dtype=f.type) for f in schema}
-    _logger.debug(f"columns_types: {columns_types}")
+    _logger.debug("columns_types: %s", columns_types)
     partitions_types: Optional[Dict[str, str]] = None
     if partitions is not None:
         partitions_types = {p.name: pyarrow2athena(p.dictionary.type) for p in partitions}
-    _logger.debug(f"partitions_types: {partitions_types}")
+    _logger.debug("partitions_types: %s", partitions_types)
     return columns_types, partitions_types
 
 
@@ -382,5 +382,5 @@ def sqlalchemy_types_from_pandas(
             sqlalchemy_columns_types[k] = casts[k]
         else:
             sqlalchemy_columns_types[k] = pyarrow2sqlalchemy(dtype=v, db_type=db_type)
-    _logger.debug(f"sqlalchemy_columns_types: {sqlalchemy_columns_types}")
+    _logger.debug("sqlalchemy_columns_types: %s", sqlalchemy_columns_types)
     return sqlalchemy_columns_types
diff --git a/awswrangler/athena.py b/awswrangler/athena.py
index 4948f56dc..bd5c7cb35 100644
--- a/awswrangler/athena.py
+++ b/awswrangler/athena.py
@@ -176,8 +176,8 @@ def wait_query(query_execution_id: str, boto3_session: Optional[boto3.Session] =
         time.sleep(_QUERY_WAIT_POLLING_DELAY)
         response = client_athena.get_query_execution(QueryExecutionId=query_execution_id)
         state = response["QueryExecution"]["Status"]["State"]
-    _logger.debug(f"state: {state}")
-    _logger.debug(f"StateChangeReason: {response['QueryExecution']['Status'].get('StateChangeReason')}")
+    _logger.debug("state: %s", state)
+    _logger.debug("StateChangeReason: %s", response["QueryExecution"]["Status"].get("StateChangeReason"))
     if state == "FAILED":
         raise exceptions.QueryFailed(response["QueryExecution"]["Status"].get("StateChangeReason"))
     if state == "CANCELLED":
@@ -265,7 +265,7 @@ def _get_query_metadata(
     cols_types: Dict[str, str] = get_query_columns_types(
         query_execution_id=query_execution_id, boto3_session=boto3_session
     )
-    _logger.debug(f"cols_types: {cols_types}")
+    _logger.debug("cols_types: %s", cols_types)
     dtype: Dict[str, str] = {}
     parse_timestamps: List[str] = []
     parse_dates: List[str] = []
@@ -298,11 +298,11 @@ def _get_query_metadata(
             converters[col_name] = lambda x: Decimal(str(x)) if str(x) not in ("", "none", " ", "<NA>") else None
         else:
             dtype[col_name] = pandas_type
-    _logger.debug(f"dtype: {dtype}")
-    _logger.debug(f"parse_timestamps: {parse_timestamps}")
-    _logger.debug(f"parse_dates: {parse_dates}")
-    _logger.debug(f"converters: {converters}")
-    _logger.debug(f"binaries: {binaries}")
+    _logger.debug("dtype: %s", dtype)
+    _logger.debug("parse_timestamps: %s", parse_timestamps)
+    _logger.debug("parse_dates: %s", parse_dates)
+    _logger.debug("converters: %s", converters)
+    _logger.debug("binaries: %s", binaries)
     return dtype, parse_timestamps, parse_dates, converters, binaries
 
 
@@ -446,7 +446,7 @@ def read_sql_query(  # pylint: disable=too-many-branches,too-many-locals
             f") AS\n"
             f"{sql}"
         )
-    _logger.debug(f"sql: {sql}")
+    _logger.debug("sql: %s", sql)
     query_id: str = start_query_execution(
         sql=sql,
         database=database,
@@ -456,7 +456,7 @@ def read_sql_query(  # pylint: disable=too-many-branches,too-many-locals
         kms_key=kms_key,
         boto3_session=session,
     )
-    _logger.debug(f"query_id: {query_id}")
+    _logger.debug("query_id: %s", query_id)
     query_response: Dict[str, Any] = wait_query(query_execution_id=query_id, boto3_session=session)
     if query_response["QueryExecution"]["Status"]["State"] in ["FAILED", "CANCELLED"]:  # pragma: no cover
         reason: str = query_response["QueryExecution"]["Status"]["StateChangeReason"]
@@ -468,7 +468,7 @@ def read_sql_query(  # pylint: disable=too-many-branches,too-many-locals
         manifest_path: str = f"{_s3_output}/tables/{query_id}-manifest.csv"
         paths: List[str] = _extract_ctas_manifest_paths(path=manifest_path, boto3_session=session)
         chunked: Union[bool, int] = False if chunksize is None else chunksize
-        _logger.debug(f"chunked: {chunked}")
+        _logger.debug("chunked: %s", chunked)
         if not paths:
             if chunked is False:
                 dfs = pd.DataFrame()
@@ -485,9 +485,9 @@ def read_sql_query(  # pylint: disable=too-many-branches,too-many-locals
     )
     path = f"{_s3_output}/{query_id}.csv"
     s3.wait_objects_exist(paths=[path], use_threads=False, boto3_session=session)
-    _logger.debug(f"Start CSV reading from {path}")
+    _logger.debug("Start CSV reading from %s", path)
     _chunksize: Optional[int] = chunksize if isinstance(chunksize, int) else None
-    _logger.debug(f"_chunksize: {_chunksize}")
+    _logger.debug("_chunksize: %s", _chunksize)
     ret = s3.read_csv(
         path=[path],
         dtype=dtype,
diff --git a/awswrangler/catalog.py b/awswrangler/catalog.py
index 8a53d4370..93092626b 100644
--- a/awswrangler/catalog.py
+++ b/awswrangler/catalog.py
@@ -766,7 +766,7 @@ def drop_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
     duplicated_cols = df.columns.duplicated()
     duplicated_cols_names: List[str] = list(df.columns[duplicated_cols])
     if len(duplicated_cols_names) > 0:
-        _logger.warning(f"Dropping repeated columns: {duplicated_cols_names}")
+        _logger.warning("Dropping repeated columns: %s", duplicated_cols_names)
     return df.loc[:, ~duplicated_cols]
 
 
@@ -967,11 +967,11 @@ def _create_table(
             if name in columns_comments:
                 par["Comment"] = columns_comments[name]
     session: boto3.Session = _utils.ensure_session(session=boto3_session)
-
-    if mode == "overwrite":
+    exist: bool = does_table_exist(database=database, table=table, boto3_session=session)
+    if (mode == "overwrite") or (exist is False):
         delete_table_if_exists(database=database, table=table, boto3_session=session)
-    client_glue: boto3.client = _utils.client(service_name="glue", session=session)
-    client_glue.create_table(DatabaseName=database, TableInput=table_input)
+        client_glue: boto3.client = _utils.client(service_name="glue", session=session)
+        client_glue.create_table(DatabaseName=database, TableInput=table_input)
 
 
 def _csv_table_definition(
diff --git a/awswrangler/cloudwatch.py b/awswrangler/cloudwatch.py
index e0a01f066..c36fab70b 100644
--- a/awswrangler/cloudwatch.py
+++ b/awswrangler/cloudwatch.py
@@ -56,11 +56,11 @@ def start_query(
     ... )
 
     """
-    _logger.debug(f"log_group_names: {log_group_names}")
+    _logger.debug("log_group_names: %s", log_group_names)
     start_timestamp: int = int(1000 * start_time.timestamp())
     end_timestamp: int = int(1000 * end_time.timestamp())
-    _logger.debug(f"start_timestamp: {start_timestamp}")
-    _logger.debug(f"end_timestamp: {end_timestamp}")
+    _logger.debug("start_timestamp: %s", start_timestamp)
+    _logger.debug("end_timestamp: %s", end_timestamp)
     args: Dict[str, Any] = {
         "logGroupNames": log_group_names,
         "startTime": start_timestamp,
@@ -109,7 +109,7 @@ def wait_query(query_id: str, boto3_session: Optional[boto3.Session] = None) ->
         time.sleep(_QUERY_WAIT_POLLING_DELAY)
         response = client_logs.get_query_results(queryId=query_id)
         status = response["status"]
-    _logger.debug(f"status: {status}")
+    _logger.debug("status: %s", status)
     if status == "Failed":  # pragma: no cover
         raise exceptions.QueryFailed(f"query ID: {query_id}")
     if status == "Cancelled":
diff --git a/awswrangler/db.py b/awswrangler/db.py
index 2c8ac2799..21b4789c4 100644
--- a/awswrangler/db.py
+++ b/awswrangler/db.py
@@ -155,29 +155,15 @@ def read_sql_query(
     ... )
 
     """
-    if not isinstance(con, sqlalchemy.engine.Engine):  # pragma: no cover
-        raise exceptions.InvalidConnection(
-            "Invalid 'con' argument, please pass a "
-            "SQLAlchemy Engine. Use wr.db.get_engine(), "
-            "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()"
-        )
+    _validate_engine(con=con)
     with con.connect() as _con:
         args = _convert_params(sql, params)
         cursor = _con.execute(*args)
         if chunksize is None:
             return _records2df(records=cursor.fetchall(), cols_names=cursor.keys(), index=index_col, dtype=dtype)
-        return _iterate_cursor(cursor=cursor, chunksize=chunksize, index=index_col, dtype=dtype)
-
-
-def _iterate_cursor(
-    cursor, chunksize: int, index: Optional[Union[str, List[str]]], dtype: Optional[Dict[str, pa.DataType]] = None
-) -> Iterator[pd.DataFrame]:
-    while True:
-        records = cursor.fetchmany(chunksize)
-        if not records:
-            break
-        df: pd.DataFrame = _records2df(records=records, cols_names=cursor.keys(), index=index, dtype=dtype)
-        yield df
+        return _iterate_cursor(
+            cursor=cursor, chunksize=chunksize, cols_names=cursor.keys(), index=index_col, dtype=dtype
+        )
 
 
 def _records2df(
@@ -207,6 +193,20 @@ def _records2df(
     return df
 
 
+def _iterate_cursor(
+    cursor: Any,
+    chunksize: int,
+    cols_names: List[str],
+    index: Optional[Union[str, List[str]]],
+    dtype: Optional[Dict[str, pa.DataType]] = None,
+) -> Iterator[pd.DataFrame]:
+    while True:
+        records = cursor.fetchmany(chunksize)
+        if not records:
+            break
+        yield _records2df(records=records, cols_names=cols_names, index=index, dtype=dtype)
+
+
 def _convert_params(sql: str, params: Optional[Union[List, Tuple, Dict]]) -> List[Any]:
     args: List[Any] = [sql]
     if params is not None:
@@ -646,7 +646,7 @@ def copy_files_to_redshift(  # pylint: disable=too-many-locals,too-many-argument
     athena_types, _ = s3.read_parquet_metadata(
         path=paths, dataset=False, use_threads=use_threads, boto3_session=session
     )
-    _logger.debug(f"athena_types: {athena_types}")
+    _logger.debug("athena_types: %s", athena_types)
     redshift_types: Dict[str, str] = {}
     for col_name, col_type in athena_types.items():
         length: int = _varchar_lengths[col_name] if col_name in _varchar_lengths else varchar_lengths_default
@@ -680,7 +680,7 @@ def copy_files_to_redshift(  # pylint: disable=too-many-locals,too-many-argument
 def _rs_upsert(con: Any, table: str, temp_table: str, schema: str, primary_keys: Optional[List[str]] = None) -> None:
     if not primary_keys:
         primary_keys = _rs_get_primary_keys(con=con, schema=schema, table=table)
-    _logger.debug(f"primary_keys: {primary_keys}")
+    _logger.debug("primary_keys: %s", primary_keys)
     if not primary_keys:  # pragma: no cover
         raise exceptions.InvalidRedshiftPrimaryKeys()
     equals_clause: str = f"{table}.%s = {temp_table}.%s"
@@ -735,7 +735,7 @@ def _rs_create_table(
         f"{distkey_str}"
         f"{sortkey_str}"
     )
-    _logger.debug(f"Create table query:\n{sql}")
+    _logger.debug("Create table query:\n%s", sql)
     con.execute(sql)
     return table, schema
 
@@ -746,7 +746,7 @@ def _rs_validate_parameters(
     if diststyle not in _RS_DISTSTYLES:
         raise exceptions.InvalidRedshiftDiststyle(f"diststyle must be in {_RS_DISTSTYLES}")
     cols = list(redshift_types.keys())
-    _logger.debug(f"Redshift columns: {cols}")
+    _logger.debug("Redshift columns: %s", cols)
     if (diststyle == "KEY") and (not distkey):
         raise exceptions.InvalidRedshiftDistkey("You must pass a distkey if you intend to use KEY diststyle")
     if distkey and distkey not in cols:
@@ -775,13 +775,13 @@ def _rs_copy(
     sql: str = (
         f"COPY {table_name} FROM '{manifest_path}'\n" f"IAM_ROLE '{iam_role}'\n" "MANIFEST\n" "FORMAT AS PARQUET"
     )
-    _logger.debug(f"copy query:\n{sql}")
+    _logger.debug("copy query:\n%s", sql)
     con.execute(sql)
     sql = "SELECT pg_last_copy_id() AS query_id"
     query_id: int = con.execute(sql).fetchall()[0][0]
     sql = f"SELECT COUNT(DISTINCT filename) as num_files_loaded " f"FROM STL_LOAD_COMMITS WHERE query = {query_id}"
     num_files_loaded: int = con.execute(sql).fetchall()[0][0]
-    _logger.debug(f"{num_files_loaded} files counted. {num_files} expected.")
+    _logger.debug("%s files counted. %s expected.", num_files_loaded, num_files)
     if num_files_loaded != num_files:  # pragma: no cover
         raise exceptions.RedshiftLoadError(
             f"Redshift load rollbacked. {num_files_loaded} files counted. {num_files} expected."
@@ -846,17 +846,17 @@ def write_redshift_copy_manifest(
     payload: str = json.dumps(manifest)
     bucket: str
     bucket, key = _utils.parse_path(manifest_path)
-    _logger.debug(f"payload: {payload}")
+    _logger.debug("payload: %s", payload)
     client_s3: boto3.client = _utils.client(service_name="s3", session=session)
-    _logger.debug(f"bucket: {bucket}")
-    _logger.debug(f"key: {key}")
+    _logger.debug("bucket: %s", bucket)
+    _logger.debug("key: %s", key)
     client_s3.put_object(Body=payload, Bucket=bucket, Key=key)
     return manifest
 
 
 def _rs_drop_table(con: Any, schema: str, table: str) -> None:
     sql = f"DROP TABLE IF EXISTS {schema}.{table}"
-    _logger.debug(f"Drop table query:\n{sql}")
+    _logger.debug("Drop table query:\n%s", sql)
     con.execute(sql)
 
 
@@ -1104,5 +1104,14 @@ def unload_redshift_to_files(
         query_id: int = _con.execute(sql).fetchall()[0][0]
         sql = f"SELECT path FROM STL_UNLOAD_LOG WHERE query={query_id};"
         paths = [x[0].replace(" ", "") for x in _con.execute(sql).fetchall()]
-        _logger.debug(f"paths: {paths}")
+        _logger.debug("paths: %s", paths)
         return paths
+
+
+def _validate_engine(con: sqlalchemy.engine.Engine) -> None:  # pragma: no cover
+    if not isinstance(con, sqlalchemy.engine.Engine):
+        raise exceptions.InvalidConnection(
+            "Invalid 'con' argument, please pass a "
+            "SQLAlchemy Engine. Use wr.db.get_engine(), "
+            "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()"
+        )
diff --git a/awswrangler/emr.py b/awswrangler/emr.py
index 3801d340e..f3e505b00 100644
--- a/awswrangler/emr.py
+++ b/awswrangler/emr.py
@@ -364,7 +364,7 @@ def _build_cluster_args(**pars):  # pylint: disable=too-many-branches,too-many-s
     if pars["tags"] is not None:
         args["Tags"] = [{"Key": k, "Value": v} for k, v in pars["tags"].items()]
 
-    _logger.info(f"args: \n{json.dumps(args, default=str, indent=4)}")
+    _logger.info("args: \n%s", json.dumps(args, default=str, indent=4))
     return args
 
 
@@ -665,7 +665,7 @@ def create_cluster(  # pylint: disable=too-many-arguments,too-many-locals,unused
     args: Dict[str, Any] = _build_cluster_args(**locals())
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.run_job_flow(**args)
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["JobFlowId"]
 
 
@@ -696,7 +696,7 @@ def get_cluster_state(cluster_id: str, boto3_session: Optional[boto3.Session] =
     """
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.describe_cluster(ClusterId=cluster_id)
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["Cluster"]["Status"]["State"]
 
 
@@ -723,7 +723,7 @@ def terminate_cluster(cluster_id: str, boto3_session: Optional[boto3.Session] =
     """
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.terminate_job_flows(JobFlowIds=[cluster_id])
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
 
 
 def submit_steps(
@@ -755,7 +755,7 @@ def submit_steps(
     """
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.add_job_flow_steps(JobFlowId=cluster_id, Steps=steps)
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["StepIds"]
 
 
@@ -807,7 +807,7 @@ def submit_step(
     )
     client_emr: boto3.client = _utils.client(service_name="emr", session=session)
     response: Dict[str, Any] = client_emr.add_job_flow_steps(JobFlowId=cluster_id, Steps=[step])
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["StepIds"][0]
 
 
@@ -898,7 +898,7 @@ def get_step_state(cluster_id: str, step_id: str, boto3_session: Optional[boto3.
     """
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.describe_step(ClusterId=cluster_id, StepId=step_id)
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["Step"]["Status"]["State"]
 
 
@@ -942,7 +942,7 @@ def submit_ecr_credentials_refresh(
     )
     client_emr: boto3.client = _utils.client(service_name="emr", session=session)
     response: Dict[str, Any] = client_emr.add_job_flow_steps(JobFlowId=cluster_id, Steps=[step])
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["StepIds"][0]
 
 
diff --git a/awswrangler/s3.py b/awswrangler/s3.py
index 2358d2141..770f588a7 100644
--- a/awswrangler/s3.py
+++ b/awswrangler/s3.py
@@ -56,10 +56,10 @@ def get_bucket_region(bucket: str, boto3_session: Optional[boto3.Session] = None
 
     """
     client_s3: boto3.client = _utils.client(service_name="s3", session=boto3_session)
-    _logger.debug(f"bucket: {bucket}")
+    _logger.debug("bucket: %s", bucket)
     region: str = client_s3.get_bucket_location(Bucket=bucket)["LocationConstraint"]
     region = "us-east-1" if region is None else region
-    _logger.debug(f"region: {region}")
+    _logger.debug("region: %s", region)
     return region
 
 
@@ -145,13 +145,15 @@ def list_directories(path: str, boto3_session: Optional[boto3.Session] = None) -
     return _list_objects(path=path, delimiter="/", boto3_session=boto3_session)
 
 
-def list_objects(path: str, boto3_session: Optional[boto3.Session] = None) -> List[str]:
+def list_objects(path: str, suffix: Optional[str] = None, boto3_session: Optional[boto3.Session] = None) -> List[str]:
     """List Amazon S3 objects from a prefix.
 
     Parameters
     ----------
     path : str
         S3 path (e.g. s3://bucket/prefix).
+    suffix: str, optional
+        Suffix for filtering S3 keys.
     boto3_session : boto3.Session(), optional
         Boto3 Session. The default boto3 session will be used if boto3_session receive None.
 
@@ -176,11 +178,14 @@ def list_objects(path: str, boto3_session: Optional[boto3.Session] = None) -> Li
     ['s3://bucket/prefix0', 's3://bucket/prefix1', 's3://bucket/prefix2']
 
     """
-    return _list_objects(path=path, delimiter=None, boto3_session=boto3_session)
+    return _list_objects(path=path, delimiter=None, suffix=suffix, boto3_session=boto3_session)
 
 
 def _list_objects(
-    path: str, delimiter: Optional[str] = None, boto3_session: Optional[boto3.Session] = None
+    path: str,
+    delimiter: Optional[str] = None,
+    suffix: Optional[str] = None,
+    boto3_session: Optional[boto3.Session] = None,
 ) -> List[str]:
     client_s3: boto3.client = _utils.client(service_name="s3", session=boto3_session)
     paginator = client_s3.get_paginator("list_objects_v2")
@@ -192,14 +197,15 @@ def _list_objects(
         args["Delimiter"] = delimiter
     response_iterator = paginator.paginate(**args)
     paths: List[str] = []
-    for page in response_iterator:
+    for page in response_iterator:  # pylint: disable=too-many-nested-blocks
         if delimiter is None:
-            contents: Optional[List[Optional[Dict[str, str]]]] = page.get("Contents")
+            contents: Optional[List] = page.get("Contents")
             if contents is not None:
                 for content in contents:
                     if (content is not None) and ("Key" in content):
                         key: str = content["Key"]
-                        paths.append(f"s3://{bucket}/{key}")
+                        if (suffix is None) or key.endswith(suffix):
+                            paths.append(f"s3://{bucket}/{key}")
         else:
             prefixes: Optional[List[Optional[Dict[str, str]]]] = page.get("CommonPrefixes")
             if prefixes is not None:
@@ -210,11 +216,11 @@ def _list_objects(
     return paths
 
 
-def _path2list(path: Union[str, List[str]], boto3_session: Optional[boto3.Session]) -> List[str]:
+def _path2list(path: object, boto3_session: boto3.Session, suffix: str = None) -> List[str]:
     if isinstance(path, str):  # prefix
         paths: List[str] = list_objects(path=path, boto3_session=boto3_session)
     elif isinstance(path, list):
-        paths = path
+        paths = path if suffix is None else [x for x in path if x.endswith(suffix)]
     else:
         raise exceptions.InvalidArgumentType(f"{type(path)} is not a valid path type. Please, use str or List[str].")
     return paths
@@ -280,7 +286,7 @@ def _split_paths_by_bucket(paths: List[str]) -> Dict[str, List[str]]:
 
 
 def _delete_objects(bucket: str, keys: List[str], client_s3: boto3.client) -> None:
-    _logger.debug(f"len(keys): {len(keys)}")
+    _logger.debug("len(keys): %s", len(keys))
     batch: List[Dict[str, str]] = [{"Key": key} for key in keys]
     client_s3.delete_objects(Bucket=bucket, Delete={"Objects": batch})
 
@@ -360,7 +366,7 @@ def _describe_object(
             break
         except botocore.exceptions.ClientError as e:  # pragma: no cover
             if e.response["ResponseMetadata"]["HTTPStatusCode"] == 404:  # Not Found
-                _logger.debug(f"Object not found. {i} seconds remaining to wait.")
+                _logger.debug("Object not found. %s seconds remaining to wait.", i)
                 if i == 1:  # Last try, there is no more need to sleep
                     break
                 time.sleep(1)
@@ -674,7 +680,7 @@ def to_csv(  # pylint: disable=too-many-arguments
                     sep=sep,
                 )
             if partitions_values:
-                _logger.debug(f"partitions_values:\n{partitions_values}")
+                _logger.debug("partitions_values:\n%s", partitions_values)
                 catalog.add_csv_partitions(
                     database=database, table=table, partitions_values=partitions_values, boto3_session=session, sep=sep
                 )
@@ -703,7 +709,7 @@ def _to_csv_dataset(
     if (mode == "overwrite") or ((mode == "overwrite_partitions") and (not partition_cols)):
         delete_objects(path=path, use_threads=use_threads, boto3_session=boto3_session)
     df = _data_types.cast_pandas_with_athena_types(df=df, dtype=dtype)
-    _logger.debug(f"dtypes: {df.dtypes}")
+    _logger.debug("dtypes: %s", df.dtypes)
     if not partition_cols:
         file_path: str = f"{path}{uuid.uuid4().hex}.csv"
         _to_text(
@@ -1088,7 +1094,7 @@ def to_parquet(  # pylint: disable=too-many-arguments
                     mode="overwrite",
                 )
             if partitions_values:
-                _logger.debug(f"partitions_values:\n{partitions_values}")
+                _logger.debug("partitions_values:\n%s", partitions_values)
                 catalog.add_parquet_partitions(
                     database=database,
                     table=table,
@@ -1126,7 +1132,7 @@ def _to_parquet_dataset(
     schema: pa.Schema = _data_types.pyarrow_schema_from_pandas(
         df=df, index=index, ignore_cols=partition_cols, dtype=dtype
     )
-    _logger.debug(f"schema: {schema}")
+    _logger.debug("schema: %s", schema)
     if not partition_cols:
         file_path: str = f"{path}{uuid.uuid4().hex}{compression_ext}.parquet"
         _to_parquet_file(
@@ -1174,7 +1180,7 @@ def _to_parquet_file(
             pyarrow_dtype = _data_types.athena2pyarrow(col_type)
             field = pa.field(name=col_name, type=pyarrow_dtype)
             table = table.set_column(col_index, field, table.column(col_name).cast(pyarrow_dtype))
-            _logger.debug(f"Casting column {col_name} ({col_index}) to {col_type} ({pyarrow_dtype})")
+            _logger.debug("Casting column %s (%s) to %s (%s)", col_name, col_index, col_type, pyarrow_dtype)
     pyarrow.parquet.write_table(
         table=table,
         where=path,
@@ -1502,7 +1508,7 @@ def _read_text_chunksize(
 ) -> Iterator[pd.DataFrame]:
     fs: s3fs.S3FileSystem = _utils.get_fs(session=boto3_session, s3_additional_kwargs=s3_additional_kwargs)
     for path in paths:
-        _logger.debug(f"path: {path}")
+        _logger.debug("path: %s", path)
         if pandas_args.get("compression", "infer") == "infer":
             pandas_args["compression"] = infer_compression(path, compression="infer")
         with fs.open(path, "rb") as f:
@@ -1542,7 +1548,7 @@ def _read_parquet_init(
         path_or_paths = path[:-1] if path.endswith("/") else path
     else:
         path_or_paths = path
-    _logger.debug(f"path_or_paths: {path_or_paths}")
+    _logger.debug("path_or_paths: %s", path_or_paths)
     fs: s3fs.S3FileSystem = _utils.get_fs(session=boto3_session, s3_additional_kwargs=s3_additional_kwargs)
     cpus: int = _utils.ensure_cpu_count(use_threads=use_threads)
     data: pyarrow.parquet.ParquetDataset = pyarrow.parquet.ParquetDataset(
@@ -2239,12 +2245,12 @@ def merge_datasets(
     session: boto3.Session = _utils.ensure_session(session=boto3_session)
 
     paths: List[str] = list_objects(path=f"{source_path}/", boto3_session=session)
-    _logger.debug(f"len(paths): {len(paths)}")
+    _logger.debug("len(paths): %s", len(paths))
     if len(paths) < 1:
         return []
 
     if mode == "overwrite":
-        _logger.debug(f"Deleting to overwrite: {target_path}/")
+        _logger.debug("Deleting to overwrite: %s/", target_path)
         delete_objects(path=f"{target_path}/", use_threads=use_threads, boto3_session=session)
     elif mode == "overwrite_partitions":
         paths_wo_prefix: List[str] = [x.replace(f"{source_path}/", "") for x in paths]
@@ -2252,7 +2258,7 @@ def merge_datasets(
         partitions_paths: List[str] = list(set(paths_wo_filename))
         target_partitions_paths = [f"{target_path}/{x}" for x in partitions_paths]
         for path in target_partitions_paths:
-            _logger.debug(f"Deleting to overwrite_partitions: {path}")
+            _logger.debug("Deleting to overwrite_partitions: %s", path)
             delete_objects(path=path, use_threads=use_threads, boto3_session=session)
     elif mode != "append":
         raise exceptions.InvalidArgumentValue(f"{mode} is a invalid mode option.")
@@ -2260,7 +2266,7 @@ def merge_datasets(
     new_objects: List[str] = copy_objects(
         paths=paths, source_path=source_path, target_path=target_path, use_threads=use_threads, boto3_session=session
     )
-    _logger.debug(f"len(new_objects): {len(new_objects)}")
+    _logger.debug("len(new_objects): %s", len(new_objects))
     return new_objects
 
 
@@ -2307,7 +2313,7 @@ def copy_objects(
     ["s3://bucket1/dir1/key0", "s3://bucket1/dir1/key1"]
 
     """
-    _logger.debug(f"len(paths): {len(paths)}")
+    _logger.debug("len(paths): %s", len(paths))
     if len(paths) < 1:
         return []
     source_path = source_path[:-1] if source_path[-1] == "/" else source_path
@@ -2320,13 +2326,13 @@ def copy_objects(
         path_final: str = f"{target_path}/{path_wo_prefix}"
         new_objects.append(path_final)
         batch.append((path, path_final))
-    _logger.debug(f"len(new_objects): {len(new_objects)}")
+    _logger.debug("len(new_objects): %s", len(new_objects))
     _copy_objects(batch=batch, use_threads=use_threads, boto3_session=session)
     return new_objects
 
 
 def _copy_objects(batch: List[Tuple[str, str]], use_threads: bool, boto3_session: boto3.Session) -> None:
-    _logger.debug(f"len(batch): {len(batch)}")
+    _logger.debug("len(batch): %s", len(batch))
     client_s3: boto3.client = _utils.client(service_name="s3", session=boto3_session)
     resource_s3: boto3.resource = _utils.resource(service_name="s3", session=boto3_session)
     for source, target in batch:
diff --git a/awswrangler/torch.py b/awswrangler/torch.py
new file mode 100644
index 000000000..7d3c47316
--- /dev/null
+++ b/awswrangler/torch.py
@@ -0,0 +1,473 @@
+"""PyTorch Module."""
+import io
+import logging
+import os
+import pathlib
+import re
+from collections.abc import Iterable
+from io import BytesIO
+from typing import Any, Callable, Iterator, List, Optional, Tuple, Union
+
+import boto3  # type: ignore
+import numpy as np  # type: ignore
+import sqlalchemy  # type: ignore
+import torch  # type: ignore
+import torchaudio  # type: ignore
+from PIL import Image  # type: ignore
+from torch.utils.data.dataset import Dataset, IterableDataset  # type: ignore
+from torchvision.transforms.functional import to_tensor  # type: ignore
+
+from awswrangler import _utils, db, s3
+
+_logger: logging.Logger = logging.getLogger(__name__)
+
+
+class _BaseS3Dataset:
+    """PyTorch Amazon S3 Map-Style Dataset."""
+
+    def __init__(
+        self, path: Union[str, List[str]], suffix: Optional[str] = None, boto3_session: Optional[boto3.Session] = None
+    ):
+        """PyTorch Map-Style S3 Dataset.
+
+        Parameters
+        ----------
+        path : Union[str, List[str]]
+            S3 prefix (e.g. s3://bucket/prefix) or list of S3 objects paths (e.g. [s3://bucket/key0, s3://bucket/key1]).
+        suffix: str, optional
+            S3 suffix filtering of object keys (i.e. suffix=".png" -> s3://*.png).
+        boto3_session : boto3.Session(), optional
+            Boto3 Session. The default boto3 session will be used if boto3_session receive None.
+
+        Returns
+        -------
+        torch.utils.data.Dataset
+
+        """
+        super().__init__()
+        self._session = _utils.ensure_session(session=boto3_session)
+        self._paths: List[str] = s3._path2list(  # pylint: disable=protected-access
+            path=path, suffix=suffix, boto3_session=self._session
+        )
+
+    def _fetch_data(self, path: str) -> Any:
+        """Add parquet and csv support."""
+        bucket, key = _utils.parse_path(path=path)
+        buff = BytesIO()
+        client_s3: boto3.client = _utils.client(service_name="s3", session=self._session)
+        client_s3.download_fileobj(Bucket=bucket, Key=key, Fileobj=buff)
+        buff.seek(0)
+        return buff
+
+    @staticmethod
+    def _load_data(data: io.BytesIO, path: str) -> Any:
+        if path.endswith(".pt"):
+            data = torch.load(data)
+        elif path.endswith(".tar.gz") or path.endswith(".tgz"):  # pragma: no cover
+            raise NotImplementedError("Tar loader not implemented!")
+            # tarfile.open(fileobj=data)
+            # tar = tarfile.open(fileobj=data)
+            # for member in tar.getmembers():
+        else:  # pragma: no cover
+            raise NotImplementedError()
+
+        return data
+
+
+class _ListS3Dataset(_BaseS3Dataset, Dataset):
+    """PyTorch Amazon S3 Map-Style List Dataset."""
+
+    def __getitem__(self, index):
+        path = self._paths[index]
+        data = self._fetch_data(path)
+        return [self._data_fn(data), self._label_fn(path)]
+
+    def __len__(self):
+        return len(self._paths)
+
+    def _data_fn(self, data) -> Any:  # pragma: no cover
+        raise NotImplementedError()
+
+    def _label_fn(self, path: str) -> Any:  # pragma: no cover
+        raise NotImplementedError()
+
+
+class _S3PartitionedDataset(_ListS3Dataset):
+    """PyTorch Amazon S3 Map-Style Partitioned Dataset."""
+
+    def _label_fn(self, path: str) -> torch.Tensor:
+        label = int(re.findall(r"/(.*?)=(.*?)/", path)[-1][1])
+        return torch.tensor([label])  # pylint: disable=not-callable
+
+    def _data_fn(self, data) -> Any:  # pragma: no cover
+        raise NotImplementedError()
+
+
+# class S3FilesDataset(_BaseS3Dataset, Dataset):
+#     """PyTorch Amazon S3 Files Map-Style Dataset."""
+#
+#     def __init__(
+#         self, path: Union[str, List[str]], suffix: Optional[str] = None, boto3_session: Optional[boto3.Session] = None
+#     ):
+#         """PyTorch S3 Files Map-Style Dataset.
+#
+#         Each file under Amazon S3 path would be handled as a tensor or batch of tensors.
+#
+#         Note
+#         ----
+#         All files will be loaded to memory since random access is needed.
+#
+#         Parameters
+#         ----------
+#         path : Union[str, List[str]]
+#             S3 prefix (e.g. s3://bucket/prefix) or
+#             list of S3 objects paths (e.g. [s3://bucket/key0, s3://bucket/key1]).
+#         boto3_session : boto3.Session(), optional
+#             Boto3 Session. The default boto3 session will be used if boto3_session receive None.
+#
+#         Returns
+#         -------
+#         torch.utils.data.Dataset
+#
+#         """
+#         super(S3FilesDataset, self).__init__(path, suffix, boto3_session)
+#         self._download_files()
+#
+#     def _download_files(self) -> None:
+#         self._data = []
+#         for path in self._paths:
+#             data = self._fetch_data(path)
+#             data = self._load_data(data, path)
+#             self._data.append(data)
+#
+#         self.data = torch.cat(self._data, dim=0)
+#
+#     def __getitem__(self, index):
+#         return self._data[index]
+#
+#     def __len__(self):
+#         return len(self._data)
+
+
+class LambdaS3Dataset(_ListS3Dataset):
+    """PyTorch Amazon S3 Lambda Map-Style Dataset."""
+
+    def __init__(
+        self,
+        path: Union[str, List[str]],
+        data_fn: Callable,
+        label_fn: Callable,
+        suffix: Optional[str] = None,
+        boto3_session: Optional[boto3.Session] = None,
+    ):
+        """PyTorch Amazon S3 Lambda Dataset.
+
+        Parameters
+        ----------
+        path : Union[str, List[str]]
+            S3 prefix (e.g. s3://bucket/prefix) or list of S3 objects paths (e.g. [s3://bucket/key0, s3://bucket/key1]).
+        data_fn: Callable
+            Function that receives a io.BytesIO object and returns a torch.Tensor
+        label_fn: Callable
+            Function that receives object path (str) and return a torch.Tensor
+        suffix: str, optional
+            S3 suffix filtering of object keys (i.e. suffix=".png" -> s3://*.png).
+        boto3_session : boto3.Session(), optional
+            Boto3 Session. The default boto3 session will be used if boto3_session receive None.
+
+        Returns
+        -------
+        torch.utils.data.Dataset
+
+        Examples
+        --------
+        >>> import re
+        >>> import torch
+        >>> import awswrangler as wr
+        >>> ds = wr.torch.LambdaS3Dataset(
+        >>>     's3://bucket/path',
+        >>>     data_fn=lambda x: torch.load(x),
+        >>>     label_fn=lambda x: torch.Tensor(int(re.findall(r"/class=(.*?)/", x)[-1])),
+        >>> )
+
+        """
+        super(LambdaS3Dataset, self).__init__(path, suffix, boto3_session)
+        self._data_func = data_fn
+        self._label_func = label_fn
+
+    def _label_fn(self, path: str) -> torch.Tensor:
+        return self._label_func(path)
+
+    def _data_fn(self, data) -> torch.Tensor:
+        return self._data_func(data)
+
+
+class AudioS3Dataset(_S3PartitionedDataset):
+    """PyTorch S3 Audio Dataset."""
+
+    def __init__(
+        self,
+        path: Union[str, List[str]],
+        cache_dir: str = "/tmp/",
+        suffix: Optional[str] = None,
+        boto3_session: Optional[boto3.Session] = None,
+    ):
+        """PyTorch Amazon S3 Audio Dataset.
+
+        Read individual WAV audio files stores in Amazon S3 and return
+        them as torch tensors.
+
+        Note
+        ----
+        This dataset assumes audio files are stored with the following structure:
+
+
+        ::
+
+            bucket
+            ├── class=0
+            │   ├── audio0.wav
+            │   └── audio1.wav
+            └── class=1
+                ├── audio2.wav
+                └── audio3.wav
+
+        Parameters
+        ----------
+        path : Union[str, List[str]]
+            S3 prefix (e.g. s3://bucket/prefix) or list of S3 objects paths (e.g. [s3://bucket/key0, s3://bucket/key1]).
+        suffix: str, optional
+            S3 suffix filtering of object keys (i.e. suffix=".png" -> s3://*.png).
+        boto3_session : boto3.Session(), optional
+            Boto3 Session. The default boto3 session will be used if boto3_session receive None.
+
+        Returns
+        -------
+        torch.utils.data.Dataset
+
+        Examples
+        --------
+        Create a Audio S3 Dataset
+
+        >>> import awswrangler as wr
+        >>> ds = wr.torch.AudioS3Dataset('s3://bucket/path')
+
+
+        Training a Model
+
+        >>> criterion = CrossEntropyLoss().to(device)
+        >>> opt = SGD(model.parameters(), 0.025)
+        >>> loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)
+        >>>
+        >>> for epoch in range(epochs):
+        >>>
+        >>>     correct = 0
+        >>>     model.train()
+        >>>     for i, (inputs, labels) in enumerate(loader):
+        >>>
+        >>>         # Forward Pass
+        >>>         outputs = model(inputs)
+        >>>
+        >>>         # Backward Pass
+        >>>         loss = criterion(outputs, labels)
+        >>>         loss.backward()
+        >>>         opt.step()
+        >>>         opt.zero_grad()
+        >>>
+        >>>         # Accuracy
+        >>>         _, predicted = torch.max(outputs.data, 1)
+        >>>         correct += (predicted == labels).sum().item()
+        >>>         accuracy = 100 * correct / ((i+1) * batch_size)
+        >>>         print(f'batch: {i} loss: {loss.mean().item():.4f} acc: {accuracy:.2f}')
+
+        """
+        super(AudioS3Dataset, self).__init__(path, suffix, boto3_session)
+        self._cache_dir: str = cache_dir[:-1] if cache_dir.endswith("/") else cache_dir
+
+    def _data_fn(self, filename: str) -> Tuple[Any, Any]:  # pylint: disable=arguments-differ
+        waveform, sample_rate = torchaudio.load(filename)
+        os.remove(path=filename)
+        return waveform, sample_rate
+
+    def _fetch_data(self, path: str) -> str:
+        bucket, key = _utils.parse_path(path=path)
+        filename: str = f"{self._cache_dir}/{bucket}/{key}"
+        pathlib.Path(filename).parent.mkdir(parents=True, exist_ok=True)
+        client_s3 = _utils.client(service_name="s3", session=self._session)
+        client_s3.download_file(Bucket=bucket, Key=key, Filename=filename)
+        return filename
+
+
+class ImageS3Dataset(_S3PartitionedDataset):
+    """PyTorch Amazon S3 Image Dataset."""
+
+    def __init__(self, path: Union[str, List[str]], suffix: str, boto3_session: boto3.Session):
+        """PyTorch Amazon S3 Image Dataset.
+
+        ImageS3Dataset assumes images are patitioned (within class=<value> folders) in Amazon S3.
+        Each lisited object will be loaded by default Pillow library.
+
+        Note
+        ----
+        Assumes Images are stored with the following structure:
+
+
+        ::
+
+            bucket
+            ├── class=0
+            │   ├── img0.jpeg
+            │   └── img1.jpeg
+            └── class=1
+                ├── img2.jpeg
+                └── img3.jpeg
+
+        Parameters
+        ----------
+        path : Union[str, List[str]]
+            S3 prefix (e.g. s3://bucket/prefix) or list of S3 objects paths (e.g. [s3://bucket/key0, s3://bucket/key1]).
+        suffix: str, optional
+            S3 suffix filtering of object keys (i.e. suffix=".png" -> s3://*.png).
+        boto3_session : boto3.Session(), optional
+            Boto3 Session. The default boto3 session will be used if boto3_session receive None.
+
+        Returns
+        -------
+        torch.utils.data.Dataset
+
+        Examples
+        --------
+        >>> import awswrangler as wr
+        >>> ds = wr.torch.ImageS3Dataset('s3://bucket/path')
+
+        """
+        super(ImageS3Dataset, self).__init__(path, suffix, boto3_session)
+
+    def _data_fn(self, data: io.BytesIO) -> Any:
+        image = Image.open(data)
+        tensor = to_tensor(image)
+        return tensor
+
+
+class S3IterableDataset(IterableDataset, _BaseS3Dataset):  # pylint: disable=abstract-method
+    """PyTorch Amazon S3 Iterable Dataset.
+
+    Parameters
+    ----------
+    path : Union[str, List[str]]
+        S3 prefix (e.g. s3://bucket/prefix) or list of S3 objects paths (e.g. [s3://bucket/key0, s3://bucket/key1]).
+    suffix: str, optional
+        S3 suffix filtering of object keys (i.e. suffix=".png" -> s3://*.png).
+    boto3_session : boto3.Session(), optional
+        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
+
+    Returns
+    -------
+    torch.utils.data.Dataset
+
+    Examples
+    --------
+    >>> import awswrangler as wr
+    >>> ds = wr.torch.S3IterableDataset('s3://bucket/path')
+
+    """
+
+    def __iter__(self) -> Union[Iterator[torch.Tensor], Iterator[Tuple[torch.Tensor, torch.Tensor]]]:
+        """Iterate over data returning tensors or expanding Iterables."""
+        for path in self._paths:
+            data = self._fetch_data(path)
+            data = self._load_data(data, path)
+
+            if isinstance(data, torch.Tensor):
+                pass
+            elif isinstance(data, Iterable) and all([isinstance(d, torch.Tensor) for d in data]):
+                data = zip(*data)
+            else:  # pragma: no cover
+                raise NotImplementedError(f"ERROR: Type: {type(data)} has not been implemented!")
+            for d in data:
+                yield d
+
+
+class SQLDataset(IterableDataset):  # pylint: disable=too-few-public-methods,abstract-method
+    """Pytorch Iterable SQL Dataset."""
+
+    def __init__(
+        self,
+        sql: str,
+        con: sqlalchemy.engine.Engine,
+        label_col: Optional[Union[int, str]] = None,
+        chunksize: Optional[int] = None,
+    ):
+        """Pytorch Iterable SQL Dataset.
+
+        Support for **Redshift**, **PostgreSQL** and **MySQL**.
+
+        Parameters
+        ----------
+        sql : str
+            Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
+        con : sqlalchemy.engine.Engine
+            SQLAlchemy Engine. Please use,
+            wr.db.get_engine(), wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()
+        label_col : int, optional
+            Label column number.
+        chunksize : int, optional
+            The chunksize determines que number of rows to be retrived from the database at each time.
+
+        Returns
+        -------
+        torch.utils.data.dataset.IterableDataset
+
+        Examples
+        --------
+        >>> import awswrangler as wr
+        >>> con = wr.catalog.get_engine("aws-data-wrangler-postgresql")
+        >>> ds = wr.torch.SQLDataset('select * from public.tutorial', con=con)
+
+        """
+        super().__init__()
+        self._sql = sql
+        self._con = con
+        self._label_col = label_col
+        self._chunksize = chunksize
+
+    def __iter__(self) -> Union[Iterator[torch.Tensor], Iterator[Tuple[torch.Tensor, torch.Tensor]]]:
+        """Iterate over the Dataset."""
+        if torch.utils.data.get_worker_info() is not None:  # type: ignore
+            raise NotImplementedError()  # pragma: no cover
+        db._validate_engine(con=self._con)  # pylint: disable=protected-access
+        with self._con.connect() as con:
+            cursor: Any = con.execute(self._sql)
+            if (self._label_col is not None) and isinstance(self._label_col, str):
+                label_col: Optional[int] = list(cursor.keys()).index(self._label_col)
+            else:
+                label_col = self._label_col
+            _logger.debug("label_col: %s", label_col)
+            if self._chunksize is None:
+                return SQLDataset._records2tensor(records=cursor.fetchall(), label_col=label_col)
+            return self._iterate_cursor(cursor=cursor, chunksize=self._chunksize, label_col=label_col)
+
+    @staticmethod
+    def _iterate_cursor(
+        cursor: Any, chunksize: int, label_col: Optional[int] = None
+    ) -> Union[Iterator[torch.Tensor], Iterator[Tuple[torch.Tensor, torch.Tensor]]]:
+        while True:
+            records = cursor.fetchmany(chunksize)
+            if not records:
+                break
+            yield from SQLDataset._records2tensor(records=records, label_col=label_col)
+
+    @staticmethod
+    def _records2tensor(
+        records: List[Tuple[Any]], label_col: Optional[int] = None
+    ) -> Union[Iterator[torch.Tensor], Iterator[Tuple[torch.Tensor, torch.Tensor]]]:  # pylint: disable=unused-argument
+        for row in records:
+            if label_col is None:
+                arr_data: np.ndarray = np.array(row, dtype=np.float)
+                yield torch.as_tensor(arr_data, dtype=torch.float)  # pylint: disable=no-member
+            else:
+                arr_data = np.array(row[:label_col] + row[label_col + 1 :], dtype=np.float)  # noqa: E203
+                arr_label: np.ndarray = np.array(row[label_col], dtype=np.long)
+                ts_data: torch.Tensor = torch.as_tensor(arr_data, dtype=torch.float)  # pylint: disable=no-member
+                ts_label: torch.Tensor = torch.as_tensor(arr_label, dtype=torch.long)  # pylint: disable=no-member
+                yield ts_data, ts_label
diff --git a/building/build-docs.sh b/building/build-docs.sh
index c32c20aa0..8c807b485 100755
--- a/building/build-docs.sh
+++ b/building/build-docs.sh
@@ -4,4 +4,4 @@ set -ex
 pushd ..
 rm -rf docs/build docs/source/stubs
 make -C docs/ html
-doc8 --ignore D005 docs/source
+doc8 --ignore D005,D002 docs/source
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 6b841705e..c92e735da 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -3,6 +3,19 @@
 API Reference
 =============
 
+PyTorch
+-------
+
+.. currentmodule:: awswrangler.torch
+
+.. autosummary::
+    :toctree: stubs
+
+    AudioS3Dataset
+    ImageS3Dataset
+    S3IterableDataset
+    SQLDataset
+
 Amazon S3
 ---------
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 99a9b0730..bfdd15c5e 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,5 @@
 black~=19.3b0
-pylint~=2.4.4
+pylint~=2.5.0
 flake8~=3.7.9
 mypy~=0.770
 isort~=4.3.21
@@ -11,11 +11,11 @@ pytest-cov~=2.8.1
 pytest-xdist~=1.31.0
 scikit-learn~=0.22.1
 awscli>=1.18.22
-cfn-lint~=0.29.5
-cfn-flip~=1.2.2
+cfn-lint~=0.29.6
+cfn-flip~=1.2.3
 twine~=3.1.1
 wheel~=0.34.2
-sphinx~=3.0.1
+sphinx~=3.0.3
 sphinx_bootstrap_theme~=0.7.1
 moto~=1.3.14
 jupyterlab~=2.1.1
\ No newline at end of file
diff --git a/requirements-torch.txt b/requirements-torch.txt
new file mode 100644
index 000000000..d3e36447e
--- /dev/null
+++ b/requirements-torch.txt
@@ -0,0 +1,4 @@
+torch~=1.5.0
+torchvision~=0.6.0
+torchaudio~=0.5.0
+Pillow~=7.1.2
diff --git a/setup-dev-env.sh b/setup-dev-env.sh
index 692724ee0..c9c2e9902 100755
--- a/setup-dev-env.sh
+++ b/setup-dev-env.sh
@@ -3,5 +3,4 @@ set -ex
 
 pip install --upgrade pip
 pip install -r requirements-dev.txt
-pip install -r requirements.txt
-pip install -e .
+pip install -e ".[torch]"
diff --git a/setup.py b/setup.py
index dbd7baa5d..f9fdc6107 100644
--- a/setup.py
+++ b/setup.py
@@ -24,4 +24,7 @@
     include_package_data=True,
     python_requires=">=3.6, <3.9",
     install_requires=[open("requirements.txt").read().strip().split("\n")],
+    extras_require={
+        "torch": open("requirements-torch.txt").read().strip().split("\n")
+    }
 )
diff --git a/testing/run-validations.sh b/testing/run-validations.sh
index 966038ec9..d32fc7808 100755
--- a/testing/run-validations.sh
+++ b/testing/run-validations.sh
@@ -9,7 +9,7 @@ mv temp.yaml cloudformation.yaml
 pushd ..
 black --line-length 120 --target-version py36 awswrangler testing/test_awswrangler
 isort -rc --line-width 120 awswrangler testing/test_awswrangler
-pydocstyle awswrangler/ --add-ignore=D204
+pydocstyle awswrangler/ --add-ignore=D204,D403
 mypy awswrangler
 flake8 setup.py awswrangler testing/test_awswrangler
 pylint -j 0 awswrangler
diff --git a/testing/test_awswrangler/test_data_lake.py b/testing/test_awswrangler/test_data_lake.py
index a815cd388..94541d8e6 100644
--- a/testing/test_awswrangler/test_data_lake.py
+++ b/testing/test_awswrangler/test_data_lake.py
@@ -708,7 +708,7 @@ def test_parquet_validate_schema(bucket, database):
     df2 = pd.DataFrame({"id2": [1, 2, 3], "val": ["foo", "boo", "bar"]})
     path_file2 = f"s3://{bucket}/test_parquet_file_validate/1.parquet"
     wr.s3.to_parquet(df=df2, path=path_file2)
-    wr.s3.wait_objects_exist(paths=[path_file2])
+    wr.s3.wait_objects_exist(paths=[path_file2], use_threads=False)
     df3 = wr.s3.read_parquet(path=path, validate_schema=False)
     assert len(df3.index) == 6
     assert len(df3.columns) == 3
diff --git a/testing/test_awswrangler/test_torch.py b/testing/test_awswrangler/test_torch.py
new file mode 100644
index 000000000..6e8a3427d
--- /dev/null
+++ b/testing/test_awswrangler/test_torch.py
@@ -0,0 +1,276 @@
+import io
+import logging
+import re
+
+import boto3
+import numpy as np
+import pandas as pd
+import pytest
+import torch
+import torchaudio
+from PIL import Image
+from torch.utils.data import DataLoader
+from torchvision.transforms.functional import to_tensor
+
+import awswrangler as wr
+
+logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s][%(name)s][%(funcName)s] %(message)s")
+logging.getLogger("awswrangler").setLevel(logging.DEBUG)
+logging.getLogger("botocore.credentials").setLevel(logging.CRITICAL)
+
+
+@pytest.fixture(scope="module")
+def cloudformation_outputs():
+    response = boto3.client("cloudformation").describe_stacks(StackName="aws-data-wrangler-test")
+    outputs = {}
+    for output in response.get("Stacks")[0].get("Outputs"):
+        outputs[output.get("OutputKey")] = output.get("OutputValue")
+    yield outputs
+
+
+@pytest.fixture(scope="module")
+def bucket(cloudformation_outputs):
+    if "BucketName" in cloudformation_outputs:
+        bucket = cloudformation_outputs["BucketName"]
+    else:
+        raise Exception("You must deploy/update the test infrastructure (CloudFormation)")
+    yield bucket
+
+
+@pytest.fixture(scope="module")
+def parameters(cloudformation_outputs):
+    parameters = dict(postgresql={}, mysql={}, redshift={})
+    parameters["postgresql"]["host"] = cloudformation_outputs["PostgresqlAddress"]
+    parameters["postgresql"]["port"] = 3306
+    parameters["postgresql"]["schema"] = "public"
+    parameters["postgresql"]["database"] = "postgres"
+    parameters["mysql"]["host"] = cloudformation_outputs["MysqlAddress"]
+    parameters["mysql"]["port"] = 3306
+    parameters["mysql"]["schema"] = "test"
+    parameters["mysql"]["database"] = "test"
+    parameters["redshift"]["host"] = cloudformation_outputs["RedshiftAddress"]
+    parameters["redshift"]["port"] = cloudformation_outputs["RedshiftPort"]
+    parameters["redshift"]["identifier"] = cloudformation_outputs["RedshiftIdentifier"]
+    parameters["redshift"]["schema"] = "public"
+    parameters["redshift"]["database"] = "test"
+    parameters["redshift"]["role"] = cloudformation_outputs["RedshiftRole"]
+    parameters["password"] = cloudformation_outputs["DatabasesPassword"]
+    parameters["user"] = "test"
+    yield parameters
+
+
+@pytest.mark.parametrize("chunksize", [None, 1, 10])
+@pytest.mark.parametrize("db_type", ["mysql", "redshift", "postgresql"])
+def test_torch_sql(parameters, db_type, chunksize):
+    schema = parameters[db_type]["schema"]
+    table = f"test_torch_sql_{db_type}_{str(chunksize).lower()}"
+    engine = wr.catalog.get_engine(connection=f"aws-data-wrangler-{db_type}")
+    wr.db.to_sql(
+        df=pd.DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0]}),
+        con=engine,
+        name=table,
+        schema=schema,
+        if_exists="replace",
+        index=False,
+        index_label=None,
+        chunksize=None,
+        method=None,
+    )
+    ds = list(wr.torch.SQLDataset(f"SELECT * FROM {schema}.{table}", con=engine, chunksize=chunksize))
+    assert torch.all(ds[0].eq(torch.tensor([1.0, 4.0])))
+    assert torch.all(ds[1].eq(torch.tensor([2.0, 5.0])))
+    assert torch.all(ds[2].eq(torch.tensor([3.0, 6.0])))
+
+
+@pytest.mark.parametrize("chunksize", [None, 1, 10])
+@pytest.mark.parametrize("db_type", ["mysql", "redshift", "postgresql"])
+@pytest.mark.parametrize("label_col", [2, "c"])
+def test_torch_sql_label(parameters, db_type, chunksize, label_col):
+    schema = parameters[db_type]["schema"]
+    table = f"test_torch_sql_label_{db_type}_{str(chunksize).lower()}"
+    engine = wr.catalog.get_engine(connection=f"aws-data-wrangler-{db_type}")
+    wr.db.to_sql(
+        df=pd.DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0], "c": [7, 8, 9]}),
+        con=engine,
+        name=table,
+        schema=schema,
+        if_exists="replace",
+        index=False,
+        index_label=None,
+        chunksize=None,
+        method=None,
+    )
+    ts = list(
+        wr.torch.SQLDataset(f"SELECT * FROM {schema}.{table}", con=engine, chunksize=chunksize, label_col=label_col)
+    )
+    assert torch.all(ts[0][0].eq(torch.tensor([1.0, 4.0])))
+    assert torch.all(ts[0][1].eq(torch.tensor([7], dtype=torch.long)))
+    assert torch.all(ts[1][0].eq(torch.tensor([2.0, 5.0])))
+    assert torch.all(ts[1][1].eq(torch.tensor([8], dtype=torch.long)))
+    assert torch.all(ts[2][0].eq(torch.tensor([3.0, 6.0])))
+    assert torch.all(ts[2][1].eq(torch.tensor([9], dtype=torch.long)))
+
+
+def test_torch_image_s3(bucket):
+    folder = "test_torch_image_s3"
+    path = f"s3://{bucket}/{folder}/"
+    wr.s3.delete_objects(path=path, boto3_session=boto3.Session())
+    s3 = boto3.client("s3")
+    ref_label = 0
+    s3.put_object(
+        Body=open("docs/source/_static/logo.png", "rb").read(),
+        Bucket=bucket,
+        Key=f"{folder}/class={ref_label}/logo.png",
+        ContentType="image/png",
+    )
+    ds = wr.torch.ImageS3Dataset(path=path, suffix="png", boto3_session=boto3.Session())
+    image, label = ds[0]
+    assert image.shape == torch.Size([4, 494, 1636])
+    assert label == torch.tensor(ref_label, dtype=torch.int)
+    wr.s3.delete_objects(path=path)
+
+
+@pytest.mark.parametrize("drop_last", [True, False])
+def test_torch_image_s3_loader(bucket, drop_last):
+    folder = f"test_torch_image_s3_loader_{str(drop_last).lower()}"
+    path = f"s3://{bucket}/{folder}/"
+    wr.s3.delete_objects(path=path)
+    client_s3 = boto3.client("s3")
+    labels = np.random.randint(0, 4, size=(8,))
+    for i, label in enumerate(labels):
+        client_s3.put_object(
+            Body=open("./docs/source/_static/logo.png", "rb").read(),
+            Bucket=bucket,
+            Key=f"{folder}/class={label}/logo{i}.png",
+            ContentType="image/png",
+        )
+    ds = wr.torch.ImageS3Dataset(path=path, suffix="png", boto3_session=boto3.Session())
+    batch_size = 2
+    num_train = len(ds)
+    indices = list(range(num_train))
+    loader = DataLoader(
+        ds,
+        batch_size=batch_size,
+        num_workers=4,
+        sampler=torch.utils.data.sampler.RandomSampler(indices),
+        drop_last=drop_last,
+    )
+    for i, (image, label) in enumerate(loader):
+        assert image.shape == torch.Size([batch_size, 4, 494, 1636])
+        assert label.dtype == torch.int64
+    wr.s3.delete_objects(path=path)
+
+
+def test_torch_lambda_s3(bucket):
+    path = f"s3://{bucket}/test_torch_lambda_s3/"
+    wr.s3.delete_objects(path=path)
+    s3 = boto3.client("s3")
+    ref_label = 0
+    s3.put_object(
+        Body=open("./docs/source/_static/logo.png", "rb").read(),
+        Bucket=bucket,
+        Key=f"test_torch_lambda_s3/class={ref_label}/logo.png",
+        ContentType="image/png",
+    )
+    ds = wr.torch.LambdaS3Dataset(
+        path=path,
+        suffix="png",
+        boto3_session=boto3.Session(),
+        data_fn=lambda x: to_tensor(Image.open(x)),
+        label_fn=lambda x: int(re.findall(r"/class=(.*?)/", x)[-1]),
+    )
+    image, label = ds[0]
+    assert image.shape == torch.Size([4, 494, 1636])
+    assert label == torch.tensor(ref_label, dtype=torch.int)
+    wr.s3.delete_objects(path=path)
+
+
+def test_torch_audio_s3(bucket):
+    size = (1, 8_000 * 5)
+    audio = torch.randint(low=-25, high=25, size=size) / 100.0
+    audio_file = "/tmp/amazing_sound.wav"
+    torchaudio.save(audio_file, audio, 8_000)
+    folder = "test_torch_audio_s3"
+    path = f"s3://{bucket}/{folder}/"
+    wr.s3.delete_objects(path=path)
+    s3 = boto3.client("s3")
+    ref_label = 0
+    s3.put_object(
+        Body=open(audio_file, "rb").read(),
+        Bucket=bucket,
+        Key=f"{folder}/class={ref_label}/amazing_sound.wav",
+        ContentType="audio/wav",
+    )
+    s3_audio_file = f"{bucket}/test_torch_audio_s3/class={ref_label}/amazing_sound.wav"
+    ds = wr.torch.AudioS3Dataset(path=s3_audio_file, suffix="wav")
+    loader = DataLoader(ds, batch_size=1)
+    for (audio, rate), label in loader:
+        assert audio.shape == torch.Size((1, *size))
+    wr.s3.delete_objects(path=path)
+
+
+# def test_torch_s3_file_dataset(bucket):
+#     cifar10 = "s3://fast-ai-imageclas/cifar10.tgz"
+#     batch_size = 64
+#     for image, label in DataLoader(
+#         wr.torch.S3FilesDataset(cifar10),
+#         batch_size=batch_size,
+#     ):
+#         assert image.shape == torch.Size([batch_size, 3, 32, 32])
+#         assert label.dtype == torch.int64
+#         break
+
+
+@pytest.mark.parametrize("drop_last", [True, False])
+def test_torch_s3_iterable(bucket, drop_last):
+    folder = f"test_torch_s3_iterable_{str(drop_last).lower()}"
+    path = f"s3://{bucket}/{folder}/"
+    wr.s3.delete_objects(path=path)
+    batch_size = 32
+    client_s3 = boto3.client("s3")
+    for i in range(3):
+        batch = torch.randn(100, 3, 32, 32)
+        buff = io.BytesIO()
+        torch.save(batch, buff)
+        buff.seek(0)
+        client_s3.put_object(Body=buff.read(), Bucket=bucket, Key=f"{folder}/file{i}.pt")
+
+    for image in DataLoader(
+        wr.torch.S3IterableDataset(path=f"s3://{bucket}/{folder}/file"), batch_size=batch_size, drop_last=drop_last
+    ):
+        if drop_last:
+            assert image.shape == torch.Size([batch_size, 3, 32, 32])
+        else:
+            assert image[0].shape == torch.Size([3, 32, 32])
+
+    wr.s3.delete_objects(path=path)
+
+
+@pytest.mark.parametrize("drop_last", [True, False])
+def test_torch_s3_iterable_with_labels(bucket, drop_last):
+    folder = f"test_torch_s3_iterable_with_labels_{str(drop_last).lower()}"
+    path = f"s3://{bucket}/{folder}/"
+    wr.s3.delete_objects(path=path)
+    batch_size = 32
+    client_s3 = boto3.client("s3")
+    for i in range(3):
+        batch = (torch.randn(100, 3, 32, 32), torch.randint(2, size=(100,)))
+        buff = io.BytesIO()
+        torch.save(batch, buff)
+        buff.seek(0)
+        client_s3.put_object(Body=buff.read(), Bucket=bucket, Key=f"{folder}/file{i}.pt")
+
+    for images, labels in DataLoader(
+        wr.torch.S3IterableDataset(path=f"s3://{bucket}/{folder}/file"), batch_size=batch_size, drop_last=drop_last
+    ):
+        if drop_last:
+            assert images.shape == torch.Size([batch_size, 3, 32, 32])
+            assert labels.dtype == torch.int64
+            assert labels.shape == torch.Size([batch_size])
+
+        else:
+            assert images[0].shape == torch.Size([3, 32, 32])
+            assert labels[0].dtype == torch.int64
+            assert labels[0].shape == torch.Size([])
+
+    wr.s3.delete_objects(path=path)
diff --git a/tox.ini b/tox.ini
index 9768fd204..f2bb572c2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,10 +6,13 @@ deps =
        pytest
        pytest-xdist
        moto
-commands = pytest -n 8 testing/test_awswrangler
+       -rrequirements-torch.txt
+commands =
+       pytest -n 8 testing/test_awswrangler
 
 [testenv:py36]
 deps =
        {[testenv]deps}
        pytest-cov
-commands = pytest --cov=awswrangler -n 8 testing/test_awswrangler
+commands =
+       pytest --cov=awswrangler -n 8 testing/test_awswrangler
diff --git a/tutorials/14 - PyTorch.ipynb b/tutorials/14 - PyTorch.ipynb
new file mode 100644
index 000000000..b7af04627
--- /dev/null
+++ b/tutorials/14 - PyTorch.ipynb	
@@ -0,0 +1,330 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![AWS Data Wrangler](_static/logo.png \"AWS Data Wrangler\")](https://github.com/awslabs/aws-data-wrangler)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PyTorch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Table of Contents\n",
+    "* [1.Defining Training Function](#1.-Defining-Training-Function)\n",
+    "* [2.Training From Amazon S3](#2.-Traoning-From-Amazon-S3)\n",
+    "\t* [2.1 Writing PyTorch Dataset to S3](#2.1-Writing-PyTorch-Dataset-to-S3)\n",
+    "\t* [2.2 Training Network](#2.2-Training-Network)\n",
+    "* [3. Training From SQL Query](#3.-Training-From-SQL-Query)\n",
+    "\t* [3.1 Writing Data to SQL Database](#3.1-Writing-Data-to-SQL-Database)\n",
+    "\t* [3.3 Training Network From SQL](#3.3-Reading-single-JSON-file)\n",
+    "* [4. Creating Custom S3 Dataset](#4.-Creating-Custom-S3-Dataset)\n",
+    "\t* [4.1 Creating Custom PyTorch Dataset](#4.1-Creating-Custom-PyTorch-Dataset)\n",
+    "\t* [4.2 Writing Data to S3](#4.2-Writing-Data-to-S3)\n",
+    "\t* [4.3 Training Network](#4.4-Training-Network)\n",
+    "* [5. Delete objects](#5.-Delete-objects)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import io\n",
+    "\n",
+    "import boto3\n",
+    "import torch\n",
+    "import torchvision\n",
+    "import pandas as pd\n",
+    "import awswrangler as wr\n",
+    "\n",
+    "from torch.optim import SGD\n",
+    "from torch.nn import CrossEntropyLoss\n",
+    "from torch.utils.data import DataLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "········\n"
+     ]
+    }
+   ],
+   "source": [
+    "import getpass\n",
+    "bucket = getpass.getpass()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 1. Defining Training Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train(model, dataset, batch_size=64, epochs=2, device='cpu', num_workers=1):\n",
+    "\n",
+    "    criterion = CrossEntropyLoss().to(device)\n",
+    "    opt = SGD(model.parameters(), 0.025)\n",
+    "    loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)\n",
+    "\n",
+    "    for epoch in range(epochs):\n",
+    "\n",
+    "        correct = 0    \n",
+    "        model.train()\n",
+    "        for i, (inputs, labels) in enumerate(loader):\n",
+    "\n",
+    "            # Forward Pass\n",
+    "            outputs = model(inputs)\n",
+    "            \n",
+    "            # Backward Pass\n",
+    "            loss = criterion(outputs, labels)\n",
+    "            loss.backward()\n",
+    "            opt.step()\n",
+    "            opt.zero_grad()\n",
+    "            \n",
+    "            # Accuracy\n",
+    "            _, predicted = torch.max(outputs.data, 1)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "            accuracy = 100 * correct / ((i+1) * batch_size)\n",
+    "\n",
+    "            print(f'batch: {i} loss: {loss.mean().item():.4f} acc: {accuracy:.2f}')   "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 2. Training From Amazon S3"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.1 Writing PyTorch Dataset to S3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "client_s3 = boto3.client(\"s3\")\n",
+    "folder = \"tutorial_torch_dataset\"\n",
+    "\n",
+    "wr.s3.delete_objects(f\"s3://{bucket}/{folder}\")\n",
+    "for i in range(3):\n",
+    "    batch = (\n",
+    "        torch.randn(100, 3, 32, 32),\n",
+    "        torch.randint(2, size=(100,)),\n",
+    "    )\n",
+    "    buff = io.BytesIO()\n",
+    "    torch.save(batch, buff)\n",
+    "    buff.seek(0)\n",
+    "    client_s3.put_object(\n",
+    "        Body=buff.read(),\n",
+    "        Bucket=bucket,\n",
+    "        Key=f\"{folder}/file{i}.pt\",\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.2 Training Network"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "batch: 0 loss: 7.0132 acc: 0.00\n",
+      "batch: 1 loss: 2.8764 acc: 21.09\n",
+      "batch: 2 loss: 0.9600 acc: 32.29\n",
+      "batch: 3 loss: 0.8676 acc: 36.33\n",
+      "batch: 4 loss: 1.1386 acc: 36.88\n",
+      "batch: 0 loss: 1.0754 acc: 51.56\n",
+      "batch: 1 loss: 1.4241 acc: 51.56\n",
+      "batch: 2 loss: 1.3019 acc: 51.04\n",
+      "batch: 3 loss: 0.8631 acc: 53.52\n",
+      "batch: 4 loss: 0.4252 acc: 54.38\n"
+     ]
+    }
+   ],
+   "source": [
+    "train(\n",
+    "    torchvision.models.resnet18(),\n",
+    "    wr.torch.S3IterableDataset(path=f\"{bucket}/{folder}\")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 2. Training Directly From SQL Query"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.1 Writing Data to SQL Database"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eng = wr.catalog.get_engine(\"aws-data-wrangler-redshift\")\n",
+    "df = pd.DataFrame({\n",
+    "    \"height\": [2, 1.4, 1.7, 1.8, 1.9, 2.2],\n",
+    "    \"weight\": [100.0, 50.0, 70.0, 80.0, 90.0, 160.0],\n",
+    "    \"target\": [1, 0, 0, 1, 1, 1]\n",
+    "})\n",
+    "\n",
+    "wr.db.to_sql(\n",
+    "    df,\n",
+    "    eng,\n",
+    "    schema=\"public\",\n",
+    "    name=\"torch\",\n",
+    "    if_exists=\"replace\",\n",
+    "    index=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.2 Training Network From SQL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "batch: 0 loss: 8.8708 acc: 50.00\n",
+      "batch: 1 loss: 88.7789 acc: 50.00\n",
+      "batch: 2 loss: 0.8655 acc: 33.33\n",
+      "batch: 0 loss: 0.7036 acc: 50.00\n",
+      "batch: 1 loss: 0.7034 acc: 50.00\n",
+      "batch: 2 loss: 0.8447 acc: 33.33\n",
+      "batch: 0 loss: 0.7012 acc: 50.00\n",
+      "batch: 1 loss: 0.7010 acc: 50.00\n",
+      "batch: 2 loss: 0.8250 acc: 33.33\n",
+      "batch: 0 loss: 0.6992 acc: 50.00\n",
+      "batch: 1 loss: 0.6991 acc: 50.00\n",
+      "batch: 2 loss: 0.8063 acc: 33.33\n",
+      "batch: 0 loss: 0.6975 acc: 50.00\n",
+      "batch: 1 loss: 0.6974 acc: 50.00\n",
+      "batch: 2 loss: 0.7886 acc: 33.33\n"
+     ]
+    }
+   ],
+   "source": [
+    "train(\n",
+    "    torch.nn.Sequential(\n",
+    "        torch.nn.Linear(2, 10),\n",
+    "        torch.nn.ReLU(),\n",
+    "        torch.nn.Linear(10, 2),    \n",
+    "    ),\n",
+    "    wr.torch.SQLDataset(\n",
+    "        sql=\"SELECT * FROM public.torch\",\n",
+    "        con=eng,\n",
+    "        label_col=\"target\",\n",
+    "        chunksize=2\n",
+    "    ),\n",
+    "    num_workers=0,\n",
+    "    batch_size=2,\n",
+    "    epochs=5\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 3. Delete Objects"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wr.s3.delete_objects(f\"s3://{bucket}/{folder}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file