Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support static_index.html docs #999

Merged
merged 12 commits into from
Jun 10, 2024
74 changes: 53 additions & 21 deletions cosmos/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from flask import abort, url_for
from flask_appbuilder import AppBuilder, expose

from cosmos.settings import dbt_docs_conn_id, dbt_docs_dir
from cosmos.settings import dbt_docs_conn_id, dbt_docs_dir, dbt_docs_index_file_name


def bucket_and_key(path: str) -> Tuple[str, str]:
Expand All @@ -19,65 +19,89 @@
return bucket, key


def open_s3_file(conn_id: Optional[str], path: str) -> str:
def open_s3_file(path: str, conn_id: Optional[str]) -> str:
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from botocore.exceptions import ClientError

if conn_id is None:
conn_id = S3Hook.default_conn_name

hook = S3Hook(aws_conn_id=conn_id)
bucket, key = bucket_and_key(path)
content = hook.read_key(key=key, bucket_name=bucket)
try:
content = hook.read_key(key=key, bucket_name=bucket)
except ClientError as e:
if e.response.get("Error", {}).get("Code", "") == "NoSuchKey":
raise FileNotFoundError(f"{path} does not exist")
raise e

Check warning on line 36 in cosmos/plugin/__init__.py

View check run for this annotation

Codecov / codecov/patch

cosmos/plugin/__init__.py#L33-L36

Added lines #L33 - L36 were not covered by tests
return content # type: ignore[no-any-return]


def open_gcs_file(conn_id: Optional[str], path: str) -> str:
def open_gcs_file(path: str, conn_id: Optional[str]) -> str:
from airflow.providers.google.cloud.hooks.gcs import GCSHook
from google.cloud.exceptions import NotFound

if conn_id is None:
conn_id = GCSHook.default_conn_name

hook = GCSHook(gcp_conn_id=conn_id)
bucket, blob = bucket_and_key(path)
content = hook.download(bucket_name=bucket, object_name=blob)
try:
content = hook.download(bucket_name=bucket, object_name=blob)
except NotFound:
raise FileNotFoundError(f"{path} does not exist")

Check warning on line 52 in cosmos/plugin/__init__.py

View check run for this annotation

Codecov / codecov/patch

cosmos/plugin/__init__.py#L51-L52

Added lines #L51 - L52 were not covered by tests
return content.decode("utf-8") # type: ignore[no-any-return]


def open_azure_file(conn_id: Optional[str], path: str) -> str:
def open_azure_file(path: str, conn_id: Optional[str]) -> str:
from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
from azure.core.exceptions import ResourceNotFoundError

if conn_id is None:
conn_id = WasbHook.default_conn_name

hook = WasbHook(wasb_conn_id=conn_id)

container, blob = bucket_and_key(path)
content = hook.read_file(container_name=container, blob_name=blob)
try:
content = hook.read_file(container_name=container, blob_name=blob)
except ResourceNotFoundError:
raise FileNotFoundError(f"{path} does not exist")

Check warning on line 69 in cosmos/plugin/__init__.py

View check run for this annotation

Codecov / codecov/patch

cosmos/plugin/__init__.py#L68-L69

Added lines #L68 - L69 were not covered by tests
return content # type: ignore[no-any-return]


def open_http_file(conn_id: Optional[str], path: str) -> str:
def open_http_file(path: str, conn_id: Optional[str]) -> str:
from airflow.providers.http.hooks.http import HttpHook
from requests.exceptions import HTTPError

if conn_id is None:
conn_id = ""

hook = HttpHook(method="GET", http_conn_id=conn_id)
res = hook.run(endpoint=path)
hook.check_response(res)
try:
res = hook.run(endpoint=path)
hook.check_response(res)
except HTTPError as e:
if str(e).startswith("404"):
raise FileNotFoundError(f"{path} does not exist")
raise e

Check warning on line 87 in cosmos/plugin/__init__.py

View check run for this annotation

Codecov / codecov/patch

cosmos/plugin/__init__.py#L84-L87

Added lines #L84 - L87 were not covered by tests
return res.text # type: ignore[no-any-return]


def open_file(path: str) -> str:
"""Retrieve a file from http, https, gs, s3, or wasb."""
def open_file(path: str, conn_id: Optional[str] = None) -> str:
"""
Retrieve a file from http, https, gs, s3, or wasb.

Raise a (base Python) FileNotFoundError if the file is not found.
"""
if path.strip().startswith("s3://"):
return open_s3_file(conn_id=dbt_docs_conn_id, path=path)
return open_s3_file(path, conn_id=conn_id)
elif path.strip().startswith("gs://"):
return open_gcs_file(conn_id=dbt_docs_conn_id, path=path)
return open_gcs_file(path, conn_id=conn_id)
elif path.strip().startswith("wasb://"):
return open_azure_file(conn_id=dbt_docs_conn_id, path=path)
return open_azure_file(path, conn_id=conn_id)
elif path.strip().startswith("http://") or path.strip().startswith("https://"):
return open_http_file(conn_id=dbt_docs_conn_id, path=path)
return open_http_file(path, conn_id=conn_id)
else:
with open(path) as f:
content = f.read()
Expand Down Expand Up @@ -167,7 +191,7 @@
def dbt_docs_index(self) -> str:
if dbt_docs_dir is None:
abort(404)
html = open_file(op.join(dbt_docs_dir, "index.html"))
html = open_file(op.join(dbt_docs_dir, dbt_docs_index_file_name), conn_id=dbt_docs_conn_id)
# Hack the dbt docs to render properly in an iframe
iframe_resizer_url = url_for(".static", filename="iframeResizer.contentWindow.min.js")
html = html.replace("</head>", f'{iframe_script}<script src="{iframe_resizer_url}"></script></head>', 1)
Expand All @@ -178,16 +202,24 @@
def catalog(self) -> Tuple[str, int, Dict[str, Any]]:
if dbt_docs_dir is None:
abort(404)
data = open_file(op.join(dbt_docs_dir, "catalog.json"))
return data, 200, {"Content-Type": "application/json"}
try:
data = open_file(op.join(dbt_docs_dir, "catalog.json"), conn_id=dbt_docs_conn_id)
except FileNotFoundError:
abort(404)

Check warning on line 208 in cosmos/plugin/__init__.py

View check run for this annotation

Codecov / codecov/patch

cosmos/plugin/__init__.py#L207-L208

Added lines #L207 - L208 were not covered by tests
else:
return data, 200, {"Content-Type": "application/json"}

@expose("/manifest.json") # type: ignore[misc]
@has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
def manifest(self) -> Tuple[str, int, Dict[str, Any]]:
if dbt_docs_dir is None:
abort(404)
data = open_file(op.join(dbt_docs_dir, "manifest.json"))
return data, 200, {"Content-Type": "application/json"}
try:
data = open_file(op.join(dbt_docs_dir, "manifest.json"), conn_id=dbt_docs_conn_id)
except FileNotFoundError:
abort(404)

Check warning on line 220 in cosmos/plugin/__init__.py

View check run for this annotation

Codecov / codecov/patch

cosmos/plugin/__init__.py#L219-L220

Added lines #L219 - L220 were not covered by tests
else:
return data, 200, {"Content-Type": "application/json"}


dbt_docs_view = DbtDocsView()
Expand Down
1 change: 1 addition & 0 deletions cosmos/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
propagate_logs = conf.getboolean("cosmos", "propagate_logs", fallback=True)
dbt_docs_dir = conf.get("cosmos", "dbt_docs_dir", fallback=None)
dbt_docs_conn_id = conf.get("cosmos", "dbt_docs_conn_id", fallback=None)
dbt_docs_index_file_name = conf.get("cosmos", "dbt_docs_index_file_name", fallback="index.html")

try:
LINEAGE_NAMESPACE = conf.get("openlineage", "namespace")
Expand Down
8 changes: 8 additions & 0 deletions docs/configuration/hosting-docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ or as an environment variable:

The path can be either a folder in the local file system the webserver is running on, or a URI to a cloud storage platform (S3, GCS, Azure).

If your docs were generated using the ``--static`` flag, you can set the index filename using ``dbt_docs_index_file_name``:

.. code-block:: cfg

[cosmos]
dbt_docs_index_file_name = static_index.html


Host from Cloud Storage
~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"importlib-metadata; python_version < '3.8'",
"Jinja2>=3.0.0",
"msgpack",
"packaging",
"pydantic>=1.10.0",
"typing-extensions; python_version < '3.8'",
"virtualenv",
Expand Down Expand Up @@ -127,6 +128,7 @@ dependencies = [
"apache-airflow-providers-cncf-kubernetes>=5.1.1",
"apache-airflow-providers-amazon>=3.0.0,<8.20.0", # https://github.com/apache/airflow/issues/39103
"apache-airflow-providers-docker>=3.5.0",
"apache-airflow-providers-google",
"apache-airflow-providers-microsoft-azure",
"apache-airflow-providers-postgres",
"types-PyYAML",
Expand Down
46 changes: 12 additions & 34 deletions tests/plugin/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from unittest.mock import MagicMock, PropertyMock, mock_open, patch

import pytest
from airflow.configuration import conf
from airflow.utils.db import initdb, resetdb
from airflow.www.app import cached_app
from airflow.www.extensions.init_appbuilder import AirflowAppBuilder
Expand All @@ -34,8 +33,6 @@
open_s3_file,
)

original_conf_get = conf.get


def _get_text_from_response(response) -> str:
# Airflow < 2.4 uses an old version of Werkzeug that does not have Response.text.
Expand Down Expand Up @@ -64,13 +61,6 @@ def app() -> FlaskClient:


def test_dbt_docs(monkeypatch, app):
def conf_get(section, key, *args, **kwargs):
if section == "cosmos" and key == "dbt_docs_dir":
return "path/to/docs/dir"
else:
return original_conf_get(section, key, *args, **kwargs)

monkeypatch.setattr(conf, "get", conf_get)
monkeypatch.setattr("cosmos.plugin.dbt_docs_dir", "path/to/docs/dir")

response = app.get("/cosmos/dbt_docs")
Expand All @@ -90,23 +80,20 @@ def test_dbt_docs_not_set_up(monkeypatch, app):
@patch.object(cosmos.plugin, "open_file")
@pytest.mark.parametrize("artifact", ["dbt_docs_index.html", "manifest.json", "catalog.json"])
def test_dbt_docs_artifact(mock_open_file, monkeypatch, app, artifact):
def conf_get(section, key, *args, **kwargs):
if section == "cosmos" and key == "dbt_docs_dir":
return "path/to/docs/dir"
else:
return original_conf_get(section, key, *args, **kwargs)

monkeypatch.setattr(conf, "get", conf_get)
monkeypatch.setattr("cosmos.plugin.dbt_docs_dir", "path/to/docs/dir")
monkeypatch.setattr("cosmos.plugin.dbt_docs_conn_id", "mock_conn_id")
monkeypatch.setattr("cosmos.plugin.dbt_docs_index_file_name", "custom_index.html")

if artifact == "dbt_docs_index.html":
mock_open_file.return_value = "<head></head><body></body>"
storage_path = "path/to/docs/dir/custom_index.html"
else:
mock_open_file.return_value = "{}"
storage_path = f"path/to/docs/dir/{artifact}"

response = app.get(f"/cosmos/{artifact}")

mock_open_file.assert_called_once()
mock_open_file.assert_called_once_with(storage_path, conn_id="mock_conn_id")
assert response.status_code == 200
if artifact == "dbt_docs_index.html":
assert iframe_script in _get_text_from_response(response)
Expand All @@ -128,21 +115,12 @@ def test_dbt_docs_artifact_missing(app, artifact):
("https://my-bucket/my/path/", "open_http_file"),
],
)
def test_open_file_calls(path, open_file_callback, monkeypatch):
def conf_get(section, key, *args, **kwargs):
if section == "cosmos" and key == "dbt_docs_conn_id":
return "mock_conn_id"
else:
return original_conf_get(section, key, *args, **kwargs)

monkeypatch.setattr(conf, "get", conf_get)
monkeypatch.setattr("cosmos.plugin.dbt_docs_conn_id", "mock_conn_id")

def test_open_file_calls(path, open_file_callback):
with patch.object(cosmos.plugin, open_file_callback) as mock_callback:
mock_callback.return_value = "mock file contents"
res = open_file(path)
res = open_file(path, conn_id="mock_conn_id")

mock_callback.assert_called_with(conn_id="mock_conn_id", path=path)
mock_callback.assert_called_with(path, conn_id="mock_conn_id")
assert res == "mock file contents"


Expand All @@ -153,7 +131,7 @@ def test_open_s3_file(conn_id):
mock_hook = mock_module.S3Hook.return_value
mock_hook.read_key.return_value = "mock file contents"

res = open_s3_file(conn_id=conn_id, path="s3://mock-path/to/docs")
res = open_s3_file("s3://mock-path/to/docs", conn_id=conn_id)

if conn_id is not None:
mock_module.S3Hook.assert_called_once_with(aws_conn_id=conn_id)
Expand All @@ -169,7 +147,7 @@ def test_open_gcs_file(conn_id):
mock_hook = mock_module.GCSHook.return_value = MagicMock()
mock_hook.download.return_value = b"mock file contents"

res = open_gcs_file(conn_id=conn_id, path="gs://mock-path/to/docs")
res = open_gcs_file("gs://mock-path/to/docs", conn_id=conn_id)

if conn_id is not None:
mock_module.GCSHook.assert_called_once_with(gcp_conn_id=conn_id)
Expand All @@ -186,7 +164,7 @@ def test_open_azure_file(conn_id):
mock_hook.default_conn_name = PropertyMock(return_value="default_conn")
mock_hook.read_file.return_value = "mock file contents"

res = open_azure_file(conn_id=conn_id, path="wasb://mock-path/to/docs")
res = open_azure_file("wasb://mock-path/to/docs", conn_id=conn_id)

if conn_id is not None:
mock_module.WasbHook.assert_called_once_with(wasb_conn_id=conn_id)
Expand All @@ -205,7 +183,7 @@ def test_open_http_file(conn_id):
mock_hook.check_response.return_value = mock_response
mock_response.text = "mock file contents"

res = open_http_file(conn_id=conn_id, path="http://mock-path/to/docs")
res = open_http_file("http://mock-path/to/docs", conn_id=conn_id)

if conn_id is not None:
mock_module.HttpHook.assert_called_once_with(method="GET", http_conn_id=conn_id)
Expand Down
Loading