Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] Lint Docs #8936

Merged
merged 31 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
6918f64
lint docs dir
Kilo59 Nov 3, 2023
e8770d6
autofix
Kilo59 Nov 3, 2023
25a369e
docs `ruff.toml`
Kilo59 Nov 3, 2023
9c8c512
fix or ignore violations
Kilo59 Nov 3, 2023
43bb5b8
docs_link_checker fix
Kilo59 Nov 3, 2023
21f5e21
Merge branch 'develop' into m/_/docs-lint
Kilo59 Nov 6, 2023
9d3cd2d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 6, 2023
ca6db63
use `pathlib` with docs_link_checker
Kilo59 Nov 6, 2023
8a6253f
`prepare_prior_versions.py` use pathlib
Kilo59 Nov 7, 2023
8506264
sphinx conf
Kilo59 Nov 7, 2023
465375f
use pathlib.Path.rglob()
Kilo59 Nov 7, 2023
dfcb42b
fix types
Kilo59 Nov 7, 2023
470dda2
resolve
Kilo59 Nov 7, 2023
9e4b249
elif
Kilo59 Nov 7, 2023
1985143
fix `Path`/`str` issue
Kilo59 Nov 7, 2023
a91d95c
resolve merge conflicts
Kilo59 Mar 29, 2024
d054aa8
set line length to `88` to reduce formatting changes
Kilo59 Mar 29, 2024
4ff83c6
update path
Kilo59 Mar 29, 2024
7a2a5be
ignore line-too-long
Kilo59 Mar 29, 2024
04468b7
ignore banned-import
Kilo59 Mar 29, 2024
dc54578
use exclude instead of include
Kilo59 Mar 29, 2024
575c541
update excludes
Kilo59 Mar 29, 2024
573d8dd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 29, 2024
1157416
Merge branch 'develop' into m/_/docs-lint
Kilo59 Mar 29, 2024
1ad8df4
lin_checker
Kilo59 Mar 29, 2024
3800097
noqas
Kilo59 Mar 29, 2024
11e3e29
limited type-checking
Kilo59 Mar 29, 2024
0a62984
add doc types
Kilo59 Mar 29, 2024
0320147
fix type issues
Kilo59 Mar 29, 2024
e22217f
don't check type-files
Kilo59 Mar 29, 2024
124d991
docstring_parser type warnings
Kilo59 Mar 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
97 changes: 54 additions & 43 deletions docs/checks/docs_link_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
"""
from __future__ import annotations

import glob
import logging
import os
import pathlib
import re
import sys
from typing import List, Optional

import click
Expand All @@ -35,7 +35,7 @@ class LinkReport:
message: A message describing the failure.
"""

def __init__(self, link: str, file: str, message: str):
def __init__(self, link: str, file: pathlib.Path, message: str):
self.link = link
self.file = file
self.message = message
Expand All @@ -49,8 +49,8 @@ class LinkChecker:

def __init__(
self,
docs_path: str,
docs_root: str,
docs_path: pathlib.Path,
docs_root: pathlib.Path,
site_prefix: str,
skip_external: bool = False,
):
Expand All @@ -62,8 +62,8 @@ def __init__(
site_prefix: The top-level folder (ex: /docs) used to resolve absolute links to local files
skip_external: Whether or not to skip checking external (http..) links
"""
self._docs_path = docs_path.strip(os.path.sep)
self._docs_root = docs_root.strip(os.path.sep)
self._docs_path = docs_path
self._docs_root = docs_root
self._site_prefix = site_prefix.strip("/")
self._skip_external = skip_external

Expand Down Expand Up @@ -111,7 +111,9 @@ def _is_doc_link(self, markdown_link: str) -> bool:
def _is_anchor_link(self, link: str) -> bool:
return link.startswith("#")

def _check_external_link(self, link: str, file: str) -> Optional[LinkReport]:
def _check_external_link(
self, link: str, file: pathlib.Path
) -> Optional[LinkReport]:
if self._skip_external:
return None

Expand Down Expand Up @@ -142,23 +144,24 @@ def _check_external_link(self, link: str, file: str) -> Optional[LinkReport]:
link, file, f"External link raised a connection error {err.errno}"
)

def _get_os_path(self, path: str) -> str:
"""Gets an os-specific path from a path found in a markdown file"""
return path.replace("/", os.path.sep)
Comment on lines -155 to -157
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need this method at all if using pathlib


def _get_absolute_path(self, path: str) -> str:
return os.path.join(self._docs_root, self._get_os_path(path)) # noqa: PTH118
def _get_absolute_path(self, path: pathlib.Path) -> pathlib.Path:
return self._docs_root.joinpath(path).resolve()

def _get_relative_path(self, file: str, path: str) -> str:
def _get_relative_path(
self, file: pathlib.Path, path: pathlib.Path
) -> pathlib.Path:
# link should be relative to the location of the current file
directory = os.path.dirname(file) # noqa: PTH120
return os.path.join(directory, self._get_os_path(path)) # noqa: PTH118
return file.parent / path

def _get_docroot_path(self, path: str) -> str:
return os.path.join(self._docs_root, self._get_os_path(path)) # noqa: PTH118
def _get_docroot_path(self, path: pathlib.Path) -> pathlib.Path:
return self._docs_path / path

def _check_absolute_link(
self, link: str, file: str, path: str, version: Optional[str]
self,
link: str,
file: pathlib.Path,
path: pathlib.Path | str,
version: Optional[str],
) -> Optional[LinkReport]:
logger.debug(f"Checking absolute link {link} in file {file}")

Expand All @@ -167,71 +170,73 @@ def _check_absolute_link(
return None

# absolute links should point to files that exist (with the .md extension added)
md_file = self._get_absolute_path(path).rstrip("/") + ".md"
md_file = pathlib.Path(path).resolve().with_suffix(".md")
logger.debug(f"Absolute link {link} resolved to path {md_file}")

if not os.path.isfile(md_file): # noqa: PTH113
if not md_file.is_file():
logger.info(f"Absolute link {link} in file {file} was not found")
return LinkReport(link, file, f"Linked file {md_file} not found")
else:
logger.debug(f"Absolute link {link} in file {file} found")
return None

def _check_absolute_image(
self, link: str, file: str, path: str
self, link: str, file: pathlib.Path, path: pathlib.Path
) -> Optional[LinkReport]:
logger.debug(f"Checking absolute image {link} in file {file}")

image_file = self._get_absolute_path(path)
if not os.path.isfile(image_file): # noqa: PTH113
if not image_file.is_file():
logger.info(f"Absolute image {link} in file {file} was not found")
return LinkReport(link, file, f"Image {image_file} not found")
else:
logger.debug(f"Absolute image {link} in file {file} found")
return None

def _check_relative_link(
self, link: str, file: str, path: str
self, link: str, file: pathlib.Path, path: pathlib.Path
) -> Optional[LinkReport]:
logger.debug(f"Checking relative link {link} in file {file}")

md_file = self._get_relative_path(file, path)
logger.debug(f"Relative link {link} resolved to path {md_file}")

if not os.path.isfile(md_file): # noqa: PTH113
if not md_file.is_file():
logger.info(f"Relative link {link} in file {file} was not found")
return LinkReport(link, file, f"Linked file {md_file} not found")
else:
logger.debug(f"Relative link {link} in file{file} found")
return None

def _check_relative_image(
self, link: str, file: str, path: str
self, link: str, file: pathlib.Path, path: pathlib.Path
) -> Optional[LinkReport]:
logger.debug(f"Checking relative image {link} in file {file}")

image_file = self._get_relative_path(file, path)
if not os.path.isfile(image_file): # noqa: PTH113
if not image_file.is_file():
logger.info(f"Relative image {link} in file {file} was not found")
return LinkReport(link, file, f"Image {image_file} not found")
else:
logger.debug(f"Relative image {link} in file {file} found")
return None

def _check_docroot_link(
self, link: str, file: str, path: str
self, link: str, file: pathlib.Path, path: pathlib.Path
) -> Optional[LinkReport]:
logger.debug(f"Checking docroot link {link} in file {file}")

md_file = self._get_docroot_path(path)
if not os.path.isfile(md_file): # noqa: PTH113
if not md_file.is_file():
logger.info(f"Docroot link {link} in file {file} was not found")
return LinkReport(link, file, f"Linked file {md_file} not found")
else:
logger.debug(f"Docroot link {link} in file {file} found")
return None

def _check_link(self, match: re.Match, file: str) -> Optional[LinkReport]:
def _check_link( # noqa: PLR0912 # too complex
self, match: re.Match, file: pathlib.Path
) -> Optional[LinkReport]:
"""Checks that a link is valid. Valid links are:
- Absolute links that begin with a forward slash and the specified site prefix (ex: /docs) with no suffix
- Absolute images with an image suffix
Expand Down Expand Up @@ -285,7 +290,7 @@ def _check_link(self, match: re.Match, file: str) -> Optional[LinkReport]:

return result

def check_file(self, file: str) -> List[LinkReport]:
def check_file(self, file: pathlib.Path) -> List[LinkReport]:
"""Looks for all the links in a file and checks them.

Returns:
Expand Down Expand Up @@ -319,14 +324,14 @@ def check_file(self, file: str) -> List[LinkReport]:
@click.option(
"--path",
"-p",
type=click.Path(exists=True, file_okay=True),
type=click.Path(exists=True, file_okay=True, path_type=pathlib.Path),
default=".",
help="Path to markdown file(s) to check",
)
@click.option(
"--docs-root",
"-r",
type=click.Path(exists=True, file_okay=False),
type=click.Path(exists=True, file_okay=False, path_type=pathlib.Path),
default=None,
help="Root to all docs for link checking",
)
Expand All @@ -338,32 +343,38 @@ def check_file(self, file: str) -> List[LinkReport]:
)
@click.option("--skip-external", is_flag=True)
def scan_docs_click(
path: str, docs_root: Optional[str], site_prefix: str, skip_external: bool
path: pathlib.Path,
docs_root: Optional[pathlib.Path],
site_prefix: str,
skip_external: bool,
) -> None:
code, message = scan_docs(path, docs_root, site_prefix, skip_external)
click.echo(message)
exit(code)
sys.exit(code)


def scan_docs(
path: str, docs_root: Optional[str], site_prefix: str, skip_external: bool
path: pathlib.Path,
docs_root: pathlib.Path | None,
site_prefix: str,
skip_external: bool,
) -> tuple[int, str]:
if docs_root is None:
if not docs_root:
docs_root = path
elif not os.path.isdir(docs_root): # noqa: PTH112
elif not docs_root.is_dir():
return 1, f"Docs root path: {docs_root} is not a directory"

# prepare our return value
result: List[LinkReport] = list()
result: List[LinkReport] = []
checker = LinkChecker(path, docs_root, site_prefix, skip_external)

if os.path.isdir(path): # noqa: PTH112
if path.is_dir():
# if the path is a directory, get all .md files within it
for file in glob.glob(f"{path}/**/*.md", recursive=True):
for file in path.rglob("*.md"):
report = checker.check_file(file)
if report:
result.extend(report)
elif os.path.isfile(path): # noqa: PTH113
elif path.is_file():
# else we support checking one file at a time
result.extend(checker.check_file(path))
else:
Expand Down
17 changes: 8 additions & 9 deletions docs/prepare_prior_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"""
from __future__ import annotations

import glob
import pathlib
import re

Expand All @@ -21,7 +20,7 @@ def change_paths_for_docs_file_references(verbose: bool = False) -> None:
snippets only for v0.15.50 and later.
"""
path = _docs_dir() / "docusaurus/versioned_docs/version-0.14.13/"
files = glob.glob(f"{path}/**/*.md", recursive=True)
files = list(path.rglob("*.md"))
pattern = re.compile(r"((.*)(file *= *)((../)*))(.*)")
path_to_insert = "versioned_code/version-0.14.13/"

Expand Down Expand Up @@ -88,9 +87,9 @@ def prepend_version_info_to_name_for_snippet_by_name_references(
)
for path in paths:
version = path.name
files = []
files: list[pathlib.Path] = []
for extension in (".md", ".mdx", ".py", ".yml", ".yaml"):
files.extend(glob.glob(f"{path}/**/*{extension}", recursive=True))
files.extend(path.rglob(f"*{extension}"))
print(
f" Processing {len(files)} files for path {path} in prepend_version_info_to_name_for_snippet_by_name_references..."
)
Expand Down Expand Up @@ -129,9 +128,9 @@ def prepend_version_info_to_name_for_href_absolute_links(verbose: bool = False)
if not version_only:
raise ValueError("Path does not contain a version number")

files = []
files: list[pathlib.Path] = []
for extension in (".md", ".mdx"):
files.extend(glob.glob(f"{path}/**/*{extension}", recursive=True))
files.extend(path.rglob(f"**/*{extension}"))
print(
f" Processing {len(files)} files for path {path} in prepend_version_info_to_name_for_href_absolute_links..."
)
Expand Down Expand Up @@ -345,9 +344,9 @@ def prepend_version_info_to_name_for_md_relative_links(verbose: bool = False) ->
if not version_only:
raise ValueError("Path does not contain a version number")

files = []
files: list[pathlib.Path] = []
for extension in (".md", ".mdx"):
files.extend(glob.glob(f"{path}/**/*{extension}", recursive=True))
path.rglob(f"**/*{extension}")
print(
f" Processing {len(files)} files for path {path} in {method_name_for_logging}..."
)
Expand All @@ -357,7 +356,7 @@ def prepend_version_info_to_name_for_md_relative_links(verbose: bool = False) ->
"_data_docs_build_and_view.md",
"_checkpoint_create_and_run.md",
]
files = [file for file in files if file.split("/")[-1] in files_to_process]
files = [file for file in files if file.name in files_to_process]
for file_path in files:
with open(file_path, "r+") as f:
contents = f.read()
Expand Down
12 changes: 12 additions & 0 deletions docs/ruff.toml
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want different linting rules for the docs code we can customize it in this file

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# docs/ specific ruff linter overrides

# root linter settings are defined in the file below
extend = "../pyproject.toml"

extend-ignore = [
# https://docs.astral.sh/ruff/rules/magic-value-comparison/
"PLR2004", # can be tedious and overly verbose in docs
]

[isort]
known-first-party = ["great_expectations", "tests"]
15 changes: 10 additions & 5 deletions docs/sphinx_api_docs_source/build_sphinx_api_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def _create_mdx_files_for_docusaurus_from_sphinx_html(self) -> None:
# Read the generated html and process the content for conversion to mdx
# Write out to .mdx file using the relative file directory structure
for html_file_path in self._get_generated_html_file_paths():
logger.debug(f"Processing: {str(html_file_path.absolute())}")
logger.debug(f"Processing: {html_file_path.absolute()!s}")
with open(html_file_path.absolute()) as f:
html_file_contents = f.read()
doc_str = self._parse_and_process_html_to_mdx(
Expand All @@ -177,7 +177,7 @@ def _create_mdx_files_for_docusaurus_from_sphinx_html(self) -> None:
sidebar_entry=self._get_sidebar_entry(html_file_path=html_file_path)
)
output_path.parent.mkdir(parents=True, exist_ok=True)
logger.debug(f"Writing out mdx file: {str(output_path.absolute())}")
logger.debug(f"Writing out mdx file: {output_path.absolute()!s}")
with open(output_path, "w") as fout:
fout.write(doc_str)

Expand Down Expand Up @@ -207,7 +207,7 @@ def _parse_and_process_html_to_mdx(
Returns:
Content suitable for use in a docusaurus mdx file.
"""
from bs4 import (
from bs4 import ( # noqa: I001
BeautifulSoup,
) # Importing here since it is not a library requirement

Expand All @@ -234,7 +234,9 @@ def _parse_and_process_html_to_mdx(
doc["class"] = "sphinx-api-doc"

# Change style to styles to avoid rendering errors
tags_with_style = doc.find_all("col", style=lambda value: value in value)
tags_with_style = doc.find_all(
"col", style=lambda value: value in value # noqa: PLR0124
)
for tag in tags_with_style:
style = tag["style"]
del tag["style"]
Expand Down Expand Up @@ -592,7 +594,10 @@ def _clean_up_code_blocks(self, doc: str) -> str:
"""
doc = doc.replace("&lt;", "<").replace("&gt;", ">")
doc = (
doc.replace("“", '"').replace("”", '"').replace("‘", "'").replace("’", "'")
doc.replace("“", '"')
.replace("”", '"')
.replace("‘", "'") # noqa: RUF001
.replace("’", "'") # noqa: RUF001
)
doc = doc.replace("<cite>{", "`").replace("}</cite>", "`")
doc = doc.replace("${", r"\${")
Expand Down
3 changes: 2 additions & 1 deletion docs/sphinx_api_docs_source/check_public_api_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def run_ruff(paths: List[pathlib.Path]) -> List[str]:
cmds,
capture_output=True,
text=True,
check=False,
)

# Check to make sure `ruff` actually ran
Expand All @@ -124,7 +125,7 @@ def run_ruff(paths: List[pathlib.Path]) -> List[str]:

def _log_with_timestamp(content: str) -> None:
"""Log content with timestamp appended."""
timestamp = datetime.datetime.now()
timestamp = datetime.datetime.now() # noqa: DTZ005
timestamp_str = timestamp.strftime("%H:%M:%S")
logger.debug(f"{content} Timestamp: {timestamp_str}")

Expand Down