aws · igorborgest · Feb 25, 2021 · Feb 25, 2021 · Feb 25, 2021 · Feb 25, 2021
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -0,0 +1,4 @@
+version: 2
+formats: all
+conda:
+  environment: docs/environment.yml
diff --git a/awswrangler/_config.py b/awswrangler/_config.py
@@ -364,8 +364,8 @@ def _inject_config_doc(doc: Optional[str], available_configs: Tuple[str, ...]) -
     if "\n    Parameters" not in doc:
         return doc
     header: str = (
-        "\n    Note\n    ----"
-        "\n    This functions has arguments that can has default values configured globally through "
+        "\n\n    Note\n    ----"
+        "\n    This function has arguments which can be configured globally through "
         "*wr.config* or environment variables:\n\n"
     )
     args: Tuple[str, ...] = tuple(f"    - {x}\n" for x in available_configs)

diff --git a/awswrangler/athena/_read.py b/awswrangler/athena/_read.py
@@ -590,12 +590,12 @@ def read_sql_query(
 
     **Related tutorial:**
 
-    - `Amazon Athena <https://github.com/awslabs/aws-data-wrangler/blob/
-      main/tutorials/006%20-%20Amazon%20Athena.ipynb>`_
-    - `Athena Cache <https://github.com/awslabs/aws-data-wrangler/blob/
-      main/tutorials/019%20-%20Athena%20Cache.ipynb>`_
-    - `Global Configurations <https://github.com/awslabs/aws-data-wrangler/blob/
-      main/tutorials/021%20-%20Global%20Configurations.ipynb>`_
+    - `Amazon Athena <https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/
+      tutorials/006%20-%20Amazon%20Athena.html>`_
+    - `Athena Cache <https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/
+      tutorials/019%20-%20Athena%20Cache.html>`_
+    - `Global Configurations <https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/
+      tutorials/021%20-%20Global%20Configurations.html>`_
 
     **There are two approaches to be defined through ctas_approach parameter:**
 
@@ -642,8 +642,8 @@ def read_sql_query(
     /athena.html#Athena.Client.get_query_execution>`_ .
 
     For a practical example check out the
-    `related tutorial <https://github.com/awslabs/aws-data-wrangler/blob/
-    main/tutorials/024%20-%20Athena%20Query%20Metadata.ipynb>`_!
+    `related tutorial <https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/
+    tutorials/024%20-%20Athena%20Query%20Metadata.html>`_!
 
 
     Note
@@ -853,12 +853,12 @@ def read_sql_table(
 
     **Related tutorial:**
 
-    - `Amazon Athena <https://github.com/awslabs/aws-data-wrangler/blob/
-      main/tutorials/006%20-%20Amazon%20Athena.ipynb>`_
-    - `Athena Cache <https://github.com/awslabs/aws-data-wrangler/blob/
-      main/tutorials/019%20-%20Athena%20Cache.ipynb>`_
-    - `Global Configurations <https://github.com/awslabs/aws-data-wrangler/blob/
-      main/tutorials/021%20-%20Global%20Configurations.ipynb>`_
+    - `Amazon Athena <https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/
+      tutorials/006%20-%20Amazon%20Athena.html>`_
+    - `Athena Cache <https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/
+      tutorials/019%20-%20Athena%20Cache.html>`_
+    - `Global Configurations <https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/
+      tutorials/021%20-%20Global%20Configurations.html>`_
 
     **There are two approaches to be defined through ctas_approach parameter:**
 
@@ -902,8 +902,8 @@ def read_sql_table(
     /athena.html#Athena.Client.get_query_execution>`_ .
 
     For a practical example check out the
-    `related tutorial <https://github.com/awslabs/aws-data-wrangler/blob/main/
-    tutorials/024%20-%20Athena%20Query%20Metadata.ipynb>`_!
+    `related tutorial <https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/
+    tutorials/024%20-%20Athena%20Query%20Metadata.html>`_!
 
 
     Note

diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py
@@ -684,7 +684,7 @@ def read_parquet_table(
         This function MUST return a bool, True to read the partition or False to ignore it.
         Ignored if `dataset=False`.
         E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False``
-        https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/023%20-%20Flexible%20Partitions%20Filter.ipynb
+        https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/023%20-%20Flexible%20Partitions%20Filter.html
     columns : List[str], optional
         Names of columns to read from the file(s).
     validate_schema:

diff --git a/awswrangler/s3/_read_text.py b/awswrangler/s3/_read_text.py
@@ -217,7 +217,7 @@ def read_csv(
         This function MUST return a bool, True to read the partition or False to ignore it.
         Ignored if `dataset=False`.
         E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False``
-        https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/023%20-%20Flexible%20Partitions%20Filter.ipynb
+        https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/023%20-%20Flexible%20Partitions%20Filter.html
     pandas_kwargs :
         KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicit, just add valid
         Pandas arguments in the function call and Wrangler will accept it.
@@ -359,7 +359,7 @@ def read_fwf(
         This function MUST return a bool, True to read the partition or False to ignore it.
         Ignored if `dataset=False`.
         E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False``
-        https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/023%20-%20Flexible%20Partitions%20Filter.ipynb
+        https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/023%20-%20Flexible%20Partitions%20Filter.html
     pandas_kwargs:
         KEYWORD arguments forwarded to pandas.read_fwf(). You can NOT pass `pandas_kwargs` explicit, just add valid
         Pandas arguments in the function call and Wrangler will accept it.
@@ -505,7 +505,7 @@ def read_json(
         This function MUST return a bool, True to read the partition or False to ignore it.
         Ignored if `dataset=False`.
         E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False``
-        https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/023%20-%20Flexible%20Partitions%20Filter.ipynb
+        https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/023%20-%20Flexible%20Partitions%20Filter.html
     pandas_kwargs:
         KEYWORD arguments forwarded to pandas.read_json(). You can NOT pass `pandas_kwargs` explicit, just add valid
         Pandas arguments in the function call and Wrangler will accept it.

diff --git a/awswrangler/s3/_write_excel.py b/awswrangler/s3/_write_excel.py
@@ -43,7 +43,7 @@ def to_excel(
     s3_additional_kwargs : Optional[Dict[str, Any]]
         Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
         "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
-         "RequestPayer", "ExpectedBucketOwner".
+        "RequestPayer", "ExpectedBucketOwner".
         e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
     use_threads : bool
         True to enable concurrent requests, False to disable multiple threads.

diff --git a/awswrangler/s3/_write_parquet.py b/awswrangler/s3/_write_parquet.py
@@ -270,7 +270,7 @@ def to_parquet(  # pylint: disable=too-many-arguments,too-many-locals
     s3_additional_kwargs : Optional[Dict[str, Any]]
         Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
         "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
-         "RequestPayer", "ExpectedBucketOwner".
+        "RequestPayer", "ExpectedBucketOwner".
         e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
     sanitize_columns : bool
         True to sanitize columns names (using `wr.catalog.sanitize_table_name` and `wr.catalog.sanitize_column_name`)
@@ -291,7 +291,7 @@ def to_parquet(  # pylint: disable=too-many-arguments,too-many-locals
     concurrent_partitioning: bool
         If True will increase the parallelism level during the partitions writing. It will decrease the
         writing time and increase the memory usage.
-        https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/022%20-%20Writing%20Partitions%20Concurrently.ipynb
+        https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html
     mode: str, optional
         ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True.
         For details check the related tutorial:
@@ -302,7 +302,7 @@ def to_parquet(  # pylint: disable=too-many-arguments,too-many-locals
         If True allows schema evolution (new or missing columns), otherwise a exception will be raised.
         (Only considered if dataset=True and mode in ("append", "overwrite_partitions"))
         Related tutorial:
-        https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/014%20-%20Schema%20Evolution.ipynb
+        https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/014%20-%20Schema%20Evolution.html
     database : str, optional
         Glue/Athena catalog: Database name.
     table : str, optional
@@ -740,7 +740,7 @@ def store_parquet_metadata(  # pylint: disable=too-many-arguments
     s3_additional_kwargs : Optional[Dict[str, Any]]
         Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
         "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
-         "RequestPayer", "ExpectedBucketOwner".
+        "RequestPayer", "ExpectedBucketOwner".
         e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
     boto3_session : boto3.Session(), optional
         Boto3 Session. The default boto3 session will be used if boto3_session receive None.

diff --git a/awswrangler/s3/_write_text.py b/awswrangler/s3/_write_text.py
@@ -153,7 +153,7 @@ def to_csv(  # pylint: disable=too-many-arguments,too-many-locals,too-many-state
     s3_additional_kwargs : Optional[Dict[str, Any]]
         Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
         "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
-         "RequestPayer", "ExpectedBucketOwner".
+        "RequestPayer", "ExpectedBucketOwner".
         e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
     sanitize_columns : bool
         True to sanitize columns names or False to keep it as is.
@@ -173,7 +173,7 @@ def to_csv(  # pylint: disable=too-many-arguments,too-many-locals,too-many-state
     concurrent_partitioning: bool
         If True will increase the parallelism level during the partitions writing. It will decrease the
         writing time and increase the memory usage.
-        https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/022%20-%20Writing%20Partitions%20Concurrently.ipynb
+        https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html
     mode : str, optional
         ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True.
         For details check the related tutorial:
@@ -563,7 +563,7 @@ def to_json(
     s3_additional_kwargs : Optional[Dict[str, Any]]
         Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass",
         "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging",
-         "RequestPayer", "ExpectedBucketOwner".
+        "RequestPayer", "ExpectedBucketOwner".
         e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'}
     use_threads : bool
         True to enable concurrent requests, False to disable multiple threads.

diff --git a/docs/environment.yml b/docs/environment.yml
@@ -0,0 +1,14 @@
+channels:
+  - conda-forge
+dependencies:
+  - python>=3
+  - pandoc
+  - ipykernel
+  - pip
+  - pip:
+    - nbsphinx
+    - nbsphinx-link
+    - sphinx
+    - sphinx_bootstrap_theme
+    - IPython
+    - -e ..
diff --git a/docs/source/_ext/copy_tutorials.py b/docs/source/_ext/copy_tutorials.py
@@ -0,0 +1,10 @@
+import json
+from pathlib import Path
+
+
+def setup(app):
+    file_dir = Path(__file__).parent
+    for f in file_dir.joinpath("../../../tutorials").glob("*.ipynb"):
+        with open(file_dir.joinpath(f"../tutorials/{f.stem}.nblink"), "w") as output_file:
+            nb_link = {"path": f"../../../tutorials/{f.name}", "extra-media": ["../../../tutorials/_static"]}
+            json.dump(nb_link, output_file)
diff --git a/docs/source/_static/css/max_width.css b/docs/source/_static/css/max_width.css
@@ -0,0 +1,3 @@
+div.body {
+    max-width: 90%;
+}
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -15,6 +15,8 @@
 
 import sphinx_bootstrap_theme
 
+# Append local Sphinx extensions
+sys.path.append(os.path.abspath("./_ext"))
 # Insert awswrangler"s path into the system.
 sys.path.insert(0, os.path.abspath("../.."))
 import awswrangler
@@ -35,7 +37,14 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named "sphinx.ext.*") or your custom
 # ones.
-extensions = ["sphinx.ext.autosectionlabel", "sphinx.ext.autosummary", "sphinx.ext.napoleon"]
+extensions = [
+    "sphinx.ext.autosectionlabel",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.napoleon",
+    "nbsphinx",
+    "nbsphinx_link",
+    "copy_tutorials",
+]
 
 language = None
 
@@ -83,7 +92,7 @@
     "navbar_links": [
         ("What is Data Wrangler?", "what"),
         ("Install", "install"),
-        ("Tutorials", "https://github.com/awslabs/aws-data-wrangler/tree/main/tutorials", True),
+        ("Tutorials", "tutorials"),
         ("API Reference", "api"),
         ("License", "https://github.com/awslabs/aws-data-wrangler/blob/main/LICENSE.txt", True),
         ("Contributing", "https://github.com/awslabs/aws-data-wrangler/blob/main/CONTRIBUTING.md", True),
@@ -137,3 +146,10 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ["_static"]
+
+nbsphinx_allow_errors = True
+nbsphinx_execute = "never"
+
+
+def setup(app):
+    app.add_css_file("css/max_width.css")
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -61,7 +61,7 @@ Read The Docs
 
    what
    install
-   Tutorials <https://github.com/awslabs/aws-data-wrangler/tree/main/tutorials>
+   tutorials
    api
    Community Resources <https://github.com/awslabs/aws-data-wrangler#community-resources>
    Logging <https://github.com/awslabs/aws-data-wrangler#logging>

diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
@@ -0,0 +1,10 @@
+Tutorials
+=========
+
+.. note:: You can also find all Tutorial Notebooks on `GitHub <https://github.com/awslabs/aws-data-wrangler/tree/main/tutorials>`_.
+
+.. toctree::
+   :maxdepth: 1
+   :glob:
+
+   tutorials/*
diff --git a/docs/source/tutorials/.gitignore b/docs/source/tutorials/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -17,6 +17,9 @@ cfn-flip==1.2.3
 twine==3.3.0
 sphinx==3.5.1
 sphinx_bootstrap_theme==0.7.1
+nbsphinx==0.8.1
+nbsphinx-link==1.3.0
+IPython==7.19.0
 moto==2.0.0
 jupyterlab==3.0.9
 s3fs==0.4.2

diff --git a/requirements-docs.txt b/requirements-docs.txt