diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 000000000..7030530af --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,4 @@ +version: 2 +formats: all +conda: + environment: docs/environment.yml diff --git a/awswrangler/_config.py b/awswrangler/_config.py index 25e1f2d59..0fedb21fa 100644 --- a/awswrangler/_config.py +++ b/awswrangler/_config.py @@ -364,8 +364,8 @@ def _inject_config_doc(doc: Optional[str], available_configs: Tuple[str, ...]) - if "\n Parameters" not in doc: return doc header: str = ( - "\n Note\n ----" - "\n This functions has arguments that can has default values configured globally through " + "\n\n Note\n ----" + "\n This function has arguments which can be configured globally through " "*wr.config* or environment variables:\n\n" ) args: Tuple[str, ...] = tuple(f" - {x}\n" for x in available_configs) diff --git a/awswrangler/athena/_read.py b/awswrangler/athena/_read.py index 514de0c2d..aef498274 100644 --- a/awswrangler/athena/_read.py +++ b/awswrangler/athena/_read.py @@ -590,12 +590,12 @@ def read_sql_query( **Related tutorial:** - - `Amazon Athena `_ - - `Athena Cache `_ - - `Global Configurations `_ + - `Amazon Athena `_ + - `Athena Cache `_ + - `Global Configurations `_ **There are two approaches to be defined through ctas_approach parameter:** @@ -642,8 +642,8 @@ def read_sql_query( /athena.html#Athena.Client.get_query_execution>`_ . For a practical example check out the - `related tutorial `_! + `related tutorial `_! Note @@ -853,12 +853,12 @@ def read_sql_table( **Related tutorial:** - - `Amazon Athena `_ - - `Athena Cache `_ - - `Global Configurations `_ + - `Amazon Athena `_ + - `Athena Cache `_ + - `Global Configurations `_ **There are two approaches to be defined through ctas_approach parameter:** @@ -902,8 +902,8 @@ def read_sql_table( /athena.html#Athena.Client.get_query_execution>`_ . For a practical example check out the - `related tutorial `_! + `related tutorial `_! Note diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py index e6ca2889b..d6f726296 100644 --- a/awswrangler/s3/_read_parquet.py +++ b/awswrangler/s3/_read_parquet.py @@ -684,7 +684,7 @@ def read_parquet_table( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/023%20-%20Flexible%20Partitions%20Filter.ipynb + https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/023%20-%20Flexible%20Partitions%20Filter.html columns : List[str], optional Names of columns to read from the file(s). validate_schema: diff --git a/awswrangler/s3/_read_text.py b/awswrangler/s3/_read_text.py index 4821a2b0a..ce58aaa34 100644 --- a/awswrangler/s3/_read_text.py +++ b/awswrangler/s3/_read_text.py @@ -217,7 +217,7 @@ def read_csv( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/023%20-%20Flexible%20Partitions%20Filter.ipynb + https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/023%20-%20Flexible%20Partitions%20Filter.html pandas_kwargs : KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. @@ -359,7 +359,7 @@ def read_fwf( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/023%20-%20Flexible%20Partitions%20Filter.ipynb + https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/023%20-%20Flexible%20Partitions%20Filter.html pandas_kwargs: KEYWORD arguments forwarded to pandas.read_fwf(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. @@ -505,7 +505,7 @@ def read_json( This function MUST return a bool, True to read the partition or False to ignore it. Ignored if `dataset=False`. E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` - https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/023%20-%20Flexible%20Partitions%20Filter.ipynb + https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/023%20-%20Flexible%20Partitions%20Filter.html pandas_kwargs: KEYWORD arguments forwarded to pandas.read_json(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. diff --git a/awswrangler/s3/_write_excel.py b/awswrangler/s3/_write_excel.py index dbb191414..1556d1f8d 100644 --- a/awswrangler/s3/_write_excel.py +++ b/awswrangler/s3/_write_excel.py @@ -43,7 +43,7 @@ def to_excel( s3_additional_kwargs : Optional[Dict[str, Any]] Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass", "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging", - "RequestPayer", "ExpectedBucketOwner". + "RequestPayer", "ExpectedBucketOwner". e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'} use_threads : bool True to enable concurrent requests, False to disable multiple threads. diff --git a/awswrangler/s3/_write_parquet.py b/awswrangler/s3/_write_parquet.py index 0ebf7f90c..87293de1f 100644 --- a/awswrangler/s3/_write_parquet.py +++ b/awswrangler/s3/_write_parquet.py @@ -270,7 +270,7 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals s3_additional_kwargs : Optional[Dict[str, Any]] Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass", "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging", - "RequestPayer", "ExpectedBucketOwner". + "RequestPayer", "ExpectedBucketOwner". e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'} sanitize_columns : bool True to sanitize columns names (using `wr.catalog.sanitize_table_name` and `wr.catalog.sanitize_column_name`) @@ -291,7 +291,7 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/022%20-%20Writing%20Partitions%20Concurrently.ipynb + https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode: str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: @@ -302,7 +302,7 @@ def to_parquet( # pylint: disable=too-many-arguments,too-many-locals If True allows schema evolution (new or missing columns), otherwise a exception will be raised. (Only considered if dataset=True and mode in ("append", "overwrite_partitions")) Related tutorial: - https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/014%20-%20Schema%20Evolution.ipynb + https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/014%20-%20Schema%20Evolution.html database : str, optional Glue/Athena catalog: Database name. table : str, optional @@ -740,7 +740,7 @@ def store_parquet_metadata( # pylint: disable=too-many-arguments s3_additional_kwargs : Optional[Dict[str, Any]] Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass", "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging", - "RequestPayer", "ExpectedBucketOwner". + "RequestPayer", "ExpectedBucketOwner". e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'} boto3_session : boto3.Session(), optional Boto3 Session. The default boto3 session will be used if boto3_session receive None. diff --git a/awswrangler/s3/_write_text.py b/awswrangler/s3/_write_text.py index 48fd72f78..dc0c0537e 100644 --- a/awswrangler/s3/_write_text.py +++ b/awswrangler/s3/_write_text.py @@ -153,7 +153,7 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state s3_additional_kwargs : Optional[Dict[str, Any]] Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass", "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging", - "RequestPayer", "ExpectedBucketOwner". + "RequestPayer", "ExpectedBucketOwner". e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'} sanitize_columns : bool True to sanitize columns names or False to keep it as is. @@ -173,7 +173,7 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state concurrent_partitioning: bool If True will increase the parallelism level during the partitions writing. It will decrease the writing time and increase the memory usage. - https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/022%20-%20Writing%20Partitions%20Concurrently.ipynb + https://aws-data-wrangler.readthedocs.io/en/2.4.0-docs/tutorials/022%20-%20Writing%20Partitions%20Concurrently.html mode : str, optional ``append`` (Default), ``overwrite``, ``overwrite_partitions``. Only takes effect if dataset=True. For details check the related tutorial: @@ -563,7 +563,7 @@ def to_json( s3_additional_kwargs : Optional[Dict[str, Any]] Forward to botocore requests. Valid parameters: "ACL", "Metadata", "ServerSideEncryption", "StorageClass", "SSECustomerAlgorithm", "SSECustomerKey", "SSEKMSKeyId", "SSEKMSEncryptionContext", "Tagging", - "RequestPayer", "ExpectedBucketOwner". + "RequestPayer", "ExpectedBucketOwner". e.g. s3_additional_kwargs={'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'YOUR_KMS_KEY_ARN'} use_threads : bool True to enable concurrent requests, False to disable multiple threads. diff --git a/docs/environment.yml b/docs/environment.yml new file mode 100644 index 000000000..8dbc60fdb --- /dev/null +++ b/docs/environment.yml @@ -0,0 +1,14 @@ +channels: + - conda-forge +dependencies: + - python>=3 + - pandoc + - ipykernel + - pip + - pip: + - nbsphinx + - nbsphinx-link + - sphinx + - sphinx_bootstrap_theme + - IPython + - -e .. diff --git a/docs/source/_ext/copy_tutorials.py b/docs/source/_ext/copy_tutorials.py new file mode 100644 index 000000000..6b4b5cb8f --- /dev/null +++ b/docs/source/_ext/copy_tutorials.py @@ -0,0 +1,10 @@ +import json +from pathlib import Path + + +def setup(app): + file_dir = Path(__file__).parent + for f in file_dir.joinpath("../../../tutorials").glob("*.ipynb"): + with open(file_dir.joinpath(f"../tutorials/{f.stem}.nblink"), "w") as output_file: + nb_link = {"path": f"../../../tutorials/{f.name}", "extra-media": ["../../../tutorials/_static"]} + json.dump(nb_link, output_file) diff --git a/docs/source/_static/css/max_width.css b/docs/source/_static/css/max_width.css new file mode 100644 index 000000000..6aace8bd4 --- /dev/null +++ b/docs/source/_static/css/max_width.css @@ -0,0 +1,3 @@ +div.body { + max-width: 90%; +} diff --git a/docs/source/conf.py b/docs/source/conf.py index 57d4e4395..53e6b93c7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,6 +15,8 @@ import sphinx_bootstrap_theme +# Append local Sphinx extensions +sys.path.append(os.path.abspath("./_ext")) # Insert awswrangler"s path into the system. sys.path.insert(0, os.path.abspath("../..")) import awswrangler @@ -35,7 +37,14 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named "sphinx.ext.*") or your custom # ones. -extensions = ["sphinx.ext.autosectionlabel", "sphinx.ext.autosummary", "sphinx.ext.napoleon"] +extensions = [ + "sphinx.ext.autosectionlabel", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon", + "nbsphinx", + "nbsphinx_link", + "copy_tutorials", +] language = None @@ -83,7 +92,7 @@ "navbar_links": [ ("What is Data Wrangler?", "what"), ("Install", "install"), - ("Tutorials", "https://github.com/awslabs/aws-data-wrangler/tree/main/tutorials", True), + ("Tutorials", "tutorials"), ("API Reference", "api"), ("License", "https://github.com/awslabs/aws-data-wrangler/blob/main/LICENSE.txt", True), ("Contributing", "https://github.com/awslabs/aws-data-wrangler/blob/main/CONTRIBUTING.md", True), @@ -137,3 +146,10 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] + +nbsphinx_allow_errors = True +nbsphinx_execute = "never" + + +def setup(app): + app.add_css_file("css/max_width.css") diff --git a/docs/source/index.rst b/docs/source/index.rst index e420fd257..3f3bc8ad0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -61,7 +61,7 @@ Read The Docs what install - Tutorials + tutorials api Community Resources Logging diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst new file mode 100644 index 000000000..d934d0719 --- /dev/null +++ b/docs/source/tutorials.rst @@ -0,0 +1,10 @@ +Tutorials +========= + +.. note:: You can also find all Tutorial Notebooks on `GitHub `_. + +.. toctree:: + :maxdepth: 1 + :glob: + + tutorials/* diff --git a/docs/source/tutorials/.gitignore b/docs/source/tutorials/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/docs/source/tutorials/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/requirements-dev.txt b/requirements-dev.txt index 50b5d867a..18b5468a1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -17,6 +17,9 @@ cfn-flip==1.2.3 twine==3.3.0 sphinx==3.5.1 sphinx_bootstrap_theme==0.7.1 +nbsphinx==0.8.1 +nbsphinx-link==1.3.0 +IPython==7.19.0 moto==2.0.0 jupyterlab==3.0.9 s3fs==0.4.2 diff --git a/requirements-docs.txt b/requirements-docs.txt deleted file mode 100644 index 60aa0acf0..000000000 --- a/requirements-docs.txt +++ /dev/null @@ -1,3 +0,0 @@ -sphinx==3.5.1 -sphinx_bootstrap_theme==0.7.1 --e . diff --git a/tutorials/003 - Amazon S3.ipynb b/tutorials/003 - Amazon S3.ipynb index 05e729069..140a5d6ee 100644 --- a/tutorials/003 - Amazon S3.ipynb +++ b/tutorials/003 - Amazon S3.ipynb @@ -11,7 +11,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Amazon S3" + "# 3 - Amazon S3" ] }, { @@ -102,14 +102,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 1. CSV files" + "## 1. CSV files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## 1.1 Writing CSV files" + "### 1.1 Writing CSV files" ] }, { @@ -129,7 +129,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 1.2 Reading single CSV file" + "### 1.2 Reading single CSV file" ] }, { @@ -155,14 +155,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 1.3 Reading multiple CSV files" + "### 1.3 Reading multiple CSV files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 1.3.1 Reading CSV by list" + "#### 1.3.1 Reading CSV by list" ] }, { @@ -188,7 +188,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 1.3.2 Reading CSV by prefix" + "#### 1.3.2 Reading CSV by prefix" ] }, { @@ -214,14 +214,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 2. JSON files" + "## 2. JSON files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## 2.1 Writing JSON files" + "### 2.1 Writing JSON files" ] }, { @@ -250,7 +250,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 2.2 Reading single JSON file" + "### 2.2 Reading single JSON file" ] }, { @@ -276,14 +276,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 2.3 Reading multiple JSON files" + "### 2.3 Reading multiple JSON files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 2.3.1 Reading JSON by list" + "#### 2.3.1 Reading JSON by list" ] }, { @@ -309,7 +309,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 2.3.2 Reading JSON by prefix" + "#### 2.3.2 Reading JSON by prefix" ] }, { @@ -335,7 +335,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 3. Parquet files" + "## 3. Parquet files" ] }, { @@ -349,7 +349,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 3.1 Writing Parquet files" + "### 3.1 Writing Parquet files" ] }, { @@ -369,7 +369,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 3.2 Reading single Parquet file" + "### 3.2 Reading single Parquet file" ] }, { @@ -395,14 +395,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 3.3 Reading multiple Parquet files" + "### 3.3 Reading multiple Parquet files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 3.3.1 Reading Parquet by list" + "#### 3.3.1 Reading Parquet by list" ] }, { @@ -428,7 +428,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3.3.2 Reading Parquet by prefix" + "#### 3.3.2 Reading Parquet by prefix" ] }, { @@ -454,7 +454,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 4. Fixed-width formatted files (only read)" + "## 4. Fixed-width formatted files (only read)" ] }, { @@ -487,7 +487,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 4.1 Reading single FWF file" + "### 4.1 Reading single FWF file" ] }, { @@ -513,14 +513,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 4.2 Reading multiple FWF files" + "### 4.2 Reading multiple FWF files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 4.2.1 Reading FWF by list" + "#### 4.2.1 Reading FWF by list" ] }, { @@ -546,7 +546,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4.2.2 Reading FWF by prefix" + "#### 4.2.2 Reading FWF by prefix" ] }, { @@ -572,14 +572,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 5. Excel files" + "## 5. Excel files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## 5.1 Writing Excel file" + "### 5.1 Writing Excel file" ] }, { @@ -606,7 +606,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 5.2 Reading Excel file" + "### 5.2 Reading Excel file" ] }, { @@ -632,7 +632,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 6. Reading with lastModified filter " + "## 6. Reading with lastModified filter" ] }, { @@ -674,7 +674,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6.2 Define the Date time and specify the Timezone " + "### 6.2 Define the Date time and specify the Timezone" ] }, { @@ -863,7 +863,7 @@ } }, "source": [ - "## 8.1 Upload object from a file path" + "### 8.1 Upload object from a file path" ] }, { @@ -904,7 +904,7 @@ } }, "source": [ - "## 8.2 Upload object from a file-like object in binary mode" + "### 8.2 Upload object from a file-like object in binary mode" ] }, { @@ -942,7 +942,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 9. Delete objects" + "## 9. Delete objects" ] }, {