Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ instance/

# Sphinx documentation
docs/_build/
docs/_api/
docs/_doctrees/
docs/_inventory_cache/
docs/*/_api/
docs/_doctrees

# PyBuilder
target/
Expand Down
5 changes: 4 additions & 1 deletion docs/build_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
DocBuildError,
display_errors_summary,
)
from docs.exts.docs_build.fetch_inventories import fetch_inventories
from docs.exts.docs_build.github_action_utils import with_group # pylint: disable=no-name-in-module
from docs.exts.docs_build.spelling_checks import ( # pylint: disable=no-name-in-module
SpellingError,
Expand Down Expand Up @@ -83,7 +84,7 @@ def _get_parser():
'--for-production',
dest='for_production',
action='store_true',
help=('Builds documentation for official release i.e. all links point to stable version'),
help='Builds documentation for official release i.e. all links point to stable version',
)

return parser
Expand Down Expand Up @@ -173,6 +174,8 @@ def main():
with with_group(f"Documentation will be built for {len(current_packages)} package(s)"):
for pkg in current_packages:
print(f" - {pkg}")
with with_group("Fetching inventories"):
fetch_inventories()

all_build_errors: Dict[Optional[str], List[DocBuildError]] = {}
all_spelling_errors: Dict[Optional[str], List[SpellingError]] = {}
Expand Down
45 changes: 11 additions & 34 deletions docs/exts/airflow_intersphinx.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
ROOT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir))
DOCS_DIR = os.path.join(ROOT_DIR, 'docs')
DOCS_PROVIDER_DIR = os.path.join(ROOT_DIR, 'docs')
S3_DOC_URL = "http://apache-airflow-docs.s3-website.eu-central-1.amazonaws.com"


def _create_init_py(app, config):
Expand All @@ -51,54 +50,32 @@ def _generate_provider_intersphinx_mapping():
continue

provider_base_url = f'/docs/{package_name}/{current_version}/'
doc_inventory = f'{DOCS_DIR}/_build/docs/{package_name}/{current_version}/objects.inv'
cache_inventory = f'{DOCS_DIR}/_inventory_cache/{package_name}/objects.inv'

airflow_mapping[package_name] = (
# base URI
provider_base_url,
# Index locations list
# If passed None, this will try to fetch the index from `[base_url]/objects.inv`
# If we pass a path containing `://` then we will try to index from the given address.
# Otherwise, it will try to read the local file
#
# In this case, the local index will be read. If unsuccessful, the remote index
# will be fetched.
(
f'{DOCS_DIR}/_build/docs/{package_name}/{current_version}/objects.inv',
f'{S3_DOC_URL}/docs/{package_name}/latest/objects.inv',
),
(doc_inventory if os.path.exists(doc_inventory) else cache_inventory,),
)
if os.environ.get('AIRFLOW_PACKAGE_NAME') != 'apache-airflow':
doc_inventory = f'{DOCS_DIR}/_build/docs/apache-airflow/{current_version}/objects.inv'
cache_inventory = f'{DOCS_DIR}/_inventory_cache/apache-airflow/objects.inv'

airflow_mapping['apache-airflow'] = (
# base URI
f'/docs/apache-airflow/{current_version}/',
# Index locations list
# If passed None, this will try to fetch the index from `[base_url]/objects.inv`
# If we pass a path containing `://` then we will try to index from the given address.
# Otherwise, it will try to read the local file
#
# In this case, the local index will be read. If unsuccessful, the remote index
# will be fetched.
(
f'{DOCS_DIR}/_build/docs/apache-airflow/{current_version}/objects.inv',
f'{S3_DOC_URL}/docs/apache-airflow/latest/objects.inv',
),
(doc_inventory if os.path.exists(doc_inventory) else cache_inventory,),
)

if os.environ.get('AIRFLOW_PACKAGE_NAME') != 'apache-airflow-providers':
doc_inventory = f'{DOCS_DIR}/_build/docs/apache-airflow-providers/objects.inv'
cache_inventory = f'{DOCS_DIR}/_inventory_cache/apache-airflow-providers/objects.inv'

airflow_mapping['apache-airflow-providers'] = (
# base URI
'/docs/apache-airflow-providers/',
# Index locations list
# If passed None, this will try to fetch the index from `[base_url]/objects.inv`
# If we pass a path containing `://` then we will try to index from the given address.
# Otherwise, it will try to read the local file
#
# In this case, the local index will be read. If unsuccessful, the remote index
# will be fetched.
(
f'{DOCS_DIR}/_build/docs/apache-airflow-providers/objects.inv',
f'{S3_DOC_URL}/docs/apache-airflow-providers/objects.inv',
),
(doc_inventory if os.path.exists(doc_inventory) else cache_inventory,),
)

return airflow_mapping
Expand Down
7 changes: 6 additions & 1 deletion docs/exts/docs_build/docs_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,12 @@ def publish(self):
print()


def get_available_providers_packages():
"""Get list of all available providers packages to build."""
return [provider['package-name'] for provider in ALL_PROVIDER_YAMLS]


def get_available_packages():
"""Get list of all available packages to build."""
provider_package_names = [provider['package-name'] for provider in ALL_PROVIDER_YAMLS]
provider_package_names = get_available_providers_packages()
return ["apache-airflow", *provider_package_names, "apache-airflow-providers"]
91 changes: 91 additions & 0 deletions docs/exts/docs_build/fetch_inventories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import concurrent
import concurrent.futures
import datetime
import os
import shutil

import requests
from requests.adapters import DEFAULT_POOLSIZE

from docs.exts.docs_build.docs_builder import get_available_providers_packages

CURRENT_DIR = os.path.dirname(__file__)
ROOT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir, os.pardir))
DOCS_DIR = os.path.join(ROOT_DIR, 'docs')
CACHE_DIR = os.path.join(DOCS_DIR, '_inventory_cache')
EXPIRATION_DATE_PATH = os.path.join(DOCS_DIR, '_inventory_cache', "expiration-date")

S3_DOC_URL = "http://apache-airflow-docs.s3-website.eu-central-1.amazonaws.com"
S3_DOC_URL_VERSIONED = S3_DOC_URL + "/docs/{package_name}/latest/objects.inv"
S3_DOC_URL_NON_VERSIONED = S3_DOC_URL + "/docs/{package_name}/objects.inv"


def _fetch_file(session: requests.Session, url: str, path: str):

response = session.get(url, allow_redirects=True, stream=True)
if not response.ok:
print(f"Failed to fetch inventory: {url}")
return

os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as f:
response.raw.decode_content = True
shutil.copyfileobj(response.raw, f)
print(f"Fetched inventory: {url}")


def _is_outdated(path: str):
delta = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(path))
return delta < datetime.timedelta(hours=12)


def fetch_inventories():
"""Fetch all inventories for Airflow documentatio packages and store in cache."""
os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True)
to_download = []

for pkg_name in get_available_providers_packages():
to_download.append(
(
S3_DOC_URL_VERSIONED.format(package_name=pkg_name),
f'{CACHE_DIR}/{pkg_name}/objects.inv',
)
)
to_download.append(
(
S3_DOC_URL_VERSIONED.format(package_name='apache-airflow'),
f'{CACHE_DIR}/apache-airflow/objects.inv',
)
)
to_download.append(
(
S3_DOC_URL_NON_VERSIONED.format(package_name='apache-airflow-providers'),
f'{CACHE_DIR}/apache-airflow-providers/objects.inv',
)
)
to_download = [
(url, path) for url, path in to_download if not (os.path.isfile(path) and _is_outdated(path))
]
print(f"To download {len(to_download)} inventorie(s)")
if not to_download:
return
with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool:
for url, path in to_download:
pool.submit(_fetch_file, session=session, url=url, path=path)