diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a2b7339417..69f7f9a3d0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -98,4 +98,4 @@ jobs: - name: Build documentation run: | git fetch origin gh-pages - mike deploy --push --no-redirect --update-aliases $GITHUB_REF_NAME latest + mike deploy --push --alias-type=copy --update-aliases $GITHUB_REF_NAME latest diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6b4066b82d..ff6662d46c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -128,7 +128,7 @@ jobs: - name: Build documentation run: | - mike deploy --no-redirect --rebase --update-aliases $BRANCH_NAME latest + mike deploy --alias-type=copy --update-aliases $BRANCH_NAME latest mike set-default $BRANCH_NAME - name: Put content of gh-pages to public folder diff --git a/docs/assets/overrides/main.html b/docs/assets/overrides/main.html index c921ab3f7b..586ec84ab6 100644 --- a/docs/assets/overrides/main.html +++ b/docs/assets/overrides/main.html @@ -1,5 +1,12 @@ {% extends "base.html" %} +{% block scripts %} + + {{ super() }} + +{% endblock %} + + {% block announce %} Check out the new span classifier training tutorial ! {% endblock %} diff --git a/docs/assets/stylesheets/cards.css b/docs/assets/stylesheets/cards.css deleted file mode 100644 index 2a66d0bef7..0000000000 --- a/docs/assets/stylesheets/cards.css +++ /dev/null @@ -1,46 +0,0 @@ -.md-typeset .card-set { - grid-gap: .4rem; - display: grid; - grid-template-columns: repeat(auto-fit,minmax(16rem,1fr)); - margin: 1em 0 -} - -.md-typeset .card-set>a.card-content { - color: initial; -} - -.md-typeset .card-set>.card-content,.md-typeset .card-set>.card-content,.md-typeset .grid>.card { - border: .05rem solid var(--md-default-fg-color--lightest); - border-radius: .1rem; - display: block; - margin: 0; - padding: .8rem; - transition: border .25s,box-shadow .25s -} - -.md-typeset .card-set>.card-content:focus-within,.md-typeset .card-set>.card-content:hover,.md-typeset .card-set>.card-content:focus-within,.md-typeset .card-set>.card-content:hover,.md-typeset .grid>.card:focus-within,.md-typeset .grid>.card:hover { - border-color: #0000; - box-shadow: var(--md-shadow-z2) -} - -.md-typeset .card-set>.card-content>hr,.md-typeset .card-set>.card-content>hr,.md-typeset .grid>.card>hr { - margin-bottom: 1em; - margin-top: 1em -} - -.md-typeset .card-set>.card-content>:first-child,.md-typeset .card-set>.card-content>:first-child,.md-typeset .grid>.card>:first-child { - margin-top: 0 -} - -.md-typeset .card-set>.card-content>:last-child,.md-typeset .card-set>.card-content>:last-child,.md-typeset .grid>.card>:last-child { - margin-bottom: 0 -} - -.md-typeset .card-set>*,.md-typeset .card-set>.admonition,.md-typeset .card-set>.highlight>*,.md-typeset .card-set>.highlighttable,.md-typeset .card-set>.md-typeset details,.md-typeset .card-set>details,.md-typeset .card-set>pre { - margin-bottom: 0; - margin-top: 0 -} - -.md-typeset .card-set>.highlight>pre:only-child,.md-typeset .card-set>.highlight>pre>code,.md-typeset .card-set>.highlighttable,.md-typeset .card-set>.highlighttable>tbody,.md-typeset .card-set>.highlighttable>tbody>tr,.md-typeset .card-set>.highlighttable>tbody>tr>.code,.md-typeset .card-set>.highlighttable>tbody>tr>.code>.highlight,.md-typeset .card-set>.highlighttable>tbody>tr>.code>.highlight>pre,.md-typeset .card-set>.highlighttable>tbody>tr>.code>.highlight>pre>code { - height: 100% -} diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css index 90719615e9..30ebe620b4 100644 --- a/docs/assets/stylesheets/extra.css +++ b/docs/assets/stylesheets/extra.css @@ -53,6 +53,7 @@ } html { + font-size: 100% !important; } body, input { @@ -65,7 +66,7 @@ body, input { word-wrap: normal; color: var(--md-typeset-color); font-weight: 600; - scroll-margin-top: 1.25rem; + scroll-margin-top: 1.5625rem; letter-spacing: 0; } @@ -74,26 +75,17 @@ body, input { } .md-nav { - font-size: 0.8rem; + font-size: 1rem; } .md-typeset code { font-size: 0.95em; } -.md-typeset pre > code, .termy > [data-termynal], .highlighttable .linenos { - font-size: .75rem; -} - -.termy > [data-termynal] { - font-size: 0.8rem; - font-family: var(--md-code-font); - padding: 45px 45px 25px; +.md-typeset pre > code, .highlighttable .linenos { + font-size: .9375rem; } -.termy > [data-termynal] { - -} .md-typeset :is(.admonition,details) { font-size: inherit !important; @@ -125,21 +117,21 @@ body, input { @media screen and (min-width: 76.1875em) { .md-sidebar { - margin-top: 1.5rem; + margin-top: 1.875rem; } } @media screen and (min-width: 60em) { .md-nav--secondary .md-nav__title { background: var(--md-main-bg) !important; - box-shadow: 0 0 0.4rem 0.4rem var(--md-main-bg) !important; + box-shadow: 0 0 0.5rem 0.5rem var(--md-main-bg) !important; } } @media screen and (min-width: 76.25em) { .md-nav--primary .md-nav__title, .md-nav--secondary .md-nav__title, .md-nav--lifted > .md-nav__list > .md-nav__item--active > .md-nav__link { background: var(--md-main-bg) !important; - box-shadow: 0 0 0.4rem 0.4rem var(--md-main-bg) !important; + box-shadow: 0 0 0.5rem 0.5rem var(--md-main-bg) !important; } } @@ -152,7 +144,7 @@ body, input { } .md-content__inner { - margin-top: 1.5rem; + margin-top: 1.875rem; } .doc td > code { @@ -160,7 +152,7 @@ body, input { } .md-typeset .card-set { - grid-template-columns: repeat(auto-fit,minmax(12rem,1fr)) !important; + grid-template-columns: repeat(auto-fit,minmax(15rem,1fr)) !important; } .md-typeset code a:not(.md-annotation__index) { @@ -187,26 +179,8 @@ a.discrete-link { } .sourced-heading > a { - font-size: 1rem; + font-size: 1.25rem; align-content: center; - white-space: nowrap; -} - -.doc-param-details .subdoc { - padding: 0; - box-shadow: none; - border-color: var(--md-typeset-table-color); -} - -.doc-param-details .subdoc > div > div > div> table { - padding: 0; - box-shadow: none; - border: none; -} - -.doc-param-details .subdoc > summary { - margin: 0; - font-weight: normal; } /*.chip { @@ -306,3 +280,11 @@ a.discrete-link { color: #bababa; white-space: nowrap; } + +.md-content { + contain: paint; +} + +.hidden { + display: none !important; +} diff --git a/docs/assets/templates/python/material/class.html b/docs/assets/templates/python/material/class.html deleted file mode 100644 index ab982e5aea..0000000000 --- a/docs/assets/templates/python/material/class.html +++ /dev/null @@ -1,127 +0,0 @@ -
-{% with html_id = class.path %} - - {% if config.only_parameters or config.only_class_level %} - - {% with docstring_sections = class.docstring.parsed %} - {% include "docstring.html" with context %} - {% endwith %} - - {% if config.merge_init_into_class %} - {% if "__init__" in class.members and class.members["__init__"].has_docstring %} - {% with docstring_sections = class.members["__init__"].docstring.parsed %} - {% include "docstring.html" with context %} - {% endwith %} - {% endif %} - {% endif %} - - {% else %} - - {% if root %} - {% set show_full_path = config.show_root_full_path %} - {% set root_members = True %} - {% elif root_members %} - {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %} - {% set root_members = False %} - {% else %} - {% set show_full_path = config.show_object_full_path %} - {% endif %} - - {% if not root or config.show_root_heading %} - - {% filter heading(heading_level, - role="class", - id=html_id, - class="doc doc-heading", - toc_label=class.name) %} - - {% if config.separate_signature %} - {% if show_full_path %}{{ class.path }}{% else %}{{ class.name }}{% endif %} - {% elif config.merge_init_into_class and "__init__" in class.members -%} - {%- with function = class.members["__init__"] -%} - {%- filter highlight(language="python", inline=True) -%} - {% if show_full_path %}{{ class.path }}{% else %}{{ class.name }}{% endif %} - {%- include "signature.html" with context -%} - {%- endfilter -%} - {%- endwith -%} - {% else %} - {% if show_full_path %}{{ class.path }}{% else %}{{ class.name }}{% endif %} - {% endif %} - - {% with labels = class.labels %} - {% include "labels.html" with context %} - {% endwith %} - - {% endfilter %} - - {% if config.separate_signature and config.merge_init_into_class %} - {% if "__init__" in class.members %} - {% with function = class.members["__init__"] %} - {% filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %} - {% if show_full_path %}{{ class.path }}{% else %}{{ class.name }}{% endif %} - {% endfilter %} - {% endwith %} - {% endif %} - {% endif %} - - {% else %} - {% if config.show_root_toc_entry %} - {% filter heading(heading_level, - role="class", - id=html_id, - toc_label=class.path if config.show_root_full_path else class.name, - hidden=True) %} - {% endfilter %} - {% endif %} - {% set heading_level = heading_level - 1 %} - {% endif %} - -
- {% if config.show_bases and class.bases %} -

- Bases: {% for expression in class.bases -%} - {% include "expression.html" with context %}{% if not loop.last %}, {% endif %} - {% endfor -%} -

- {% endif %} - - {% with docstring_sections = class.docstring.parsed %} - {% with is_merged_init = True %} - {% include "docstring.html" with context %} - {% endwith %} - {% endwith %} - - {% if config.merge_init_into_class %} - {% if "__init__" in class.members and class.members["__init__"].has_docstring %} - {% with docstring_sections = class.members["__init__"].docstring.parsed, is_merged_init = True %} - {% include "docstring.html" with context %} - {% endwith %} - {% endif %} - {% endif %} - - {% if config.show_source %} - {% if config.merge_init_into_class %} - {% if "__init__" in class.members and class.members["__init__"].source %} -
- Source code in {{ class.relative_filepath }} - {{ class.members["__init__"].source|highlight(language="python", linestart=class.members["__init__"].lineno, linenums=True) }} -
- {% endif %} - {% elif class.source %} -
- Source code in {{ class.relative_filepath }} - {{ class.source|highlight(language="python", linestart=class.lineno, linenums=True) }} -
- {% endif %} - {% endif %} - - {% with obj = class %} - {% set root = False %} - {% set heading_level = heading_level + 1 %} - {% include "children.html" with context %} - {% endwith %} -
- {% endif %} - -{% endwith %} -
diff --git a/docs/assets/templates/python/material/docstring.html b/docs/assets/templates/python/material/docstring.html deleted file mode 100644 index c34e8f0f81..0000000000 --- a/docs/assets/templates/python/material/docstring.html +++ /dev/null @@ -1,34 +0,0 @@ -{% if docstring_sections %} - {{ log.debug("Rendering docstring") }} - {% for section in docstring_sections %} - {% if not config.only_parameters %} - {% if section.kind.value == "text" and (not config.sections or "text" in config.sections) %} - {{ section.value|convert_markdown(heading_level - 1, html_id) }} - {% elif section.kind.value == "attributes" and (not config.sections or "attributes" in config.sections) %} - {% include "docstring/attributes.html" with context %} - {% elif section.kind.value == "parameters" and (not config.sections or "parameters" in config.sections) %} - {% include "docstring/parameters.html" with context %} - {% elif section.kind.value == "other parameters" and (not config.sections or "parameters" in config.sections) %} - {% include "docstring/other_parameters.html" with context %} - {% elif section.kind.value == "raises" and (not config.sections or "raises" in config.sections) %} - {% include "docstring/raises.html" with context %} - {% elif section.kind.value == "warns" and (not config.sections or "warns" in config.sections) %} - {% include "docstring/warns.html" with context %} - {% elif section.kind.value == "yields" and (not config.sections or "yields" in config.sections) %} - {% include "docstring/yields.html" with context %} - {% elif section.kind.value == "receives" and (not config.sections or "receives" in config.sections) %} - {% include "docstring/receives.html" with context %} - {% elif section.kind.value == "returns" and (not config.sections or "returns" in config.sections) %} - {% include "docstring/returns.html" with context %} - {% elif section.kind.value == "examples" and (not config.sections or "examples" in config.sections) %} - {% include "docstring/examples.html" with context %} - {% elif section.kind.value == "admonition" and (not config.sections or "admonition" in config.sections) %} - {% include "docstring/admonition.html" with context %} - {% endif %} - {% elif section.kind.value == "parameters" %} - {% include "docstring/parameters.html" with context %} - {% elif section.kind.value == "attributes" %} - {% include "docstring/attributes.html" with context %} - {% endif %} - {% endfor %} -{% endif %} diff --git a/docs/assets/templates/python/material/docstring/examples.html b/docs/assets/templates/python/material/docstring/examples.html deleted file mode 100644 index 394e085ce8..0000000000 --- a/docs/assets/templates/python/material/docstring/examples.html +++ /dev/null @@ -1,8 +0,0 @@ -{{ "# Examples\n"|convert_markdown(heading_level, html_id) }} -{% for section_type, sub_section in section.value %} - {% if section_type.value == "text" %} - {{ sub_section|convert_markdown(heading_level, html_id) }} - {% elif section_type.value == "examples" %} - {{ sub_section|convert_markdown(heading_level, html_id) }} - {% endif %} -{% endfor %} diff --git a/docs/assets/templates/python/material/docstring/parameters.html b/docs/assets/templates/python/material/docstring/parameters.html deleted file mode 100644 index 6e6eb63bfd..0000000000 --- a/docs/assets/templates/python/material/docstring/parameters.html +++ /dev/null @@ -1,104 +0,0 @@ -{% if config.only_parameters != "no-header" and config.header != false %} -{{ "# Parameters\n"|convert_markdown(heading_level, html_id) }} -{% endif %} -{% if config.docstring_section_style == "table" %} - {% block table_style %} -

{{ section.title or "Parameters:" }}

- - - - - - - - - - - {% for parameter in section.value %} - {% if not ((config.only_parameters or config.skip_parameters is defined) and parameter.name in (config.skip_parameters if config.skip_parameters is defined else ("nlp", "name", "vocab", "scorer"))) %} - - - - - - - {% endif %} - {% endfor %} - -
NameTypeDescriptionDefault
{{ parameter.name }} - {% if parameter.annotation %} - {% with expression = parameter.annotation %} - {% include "expression.html" with context %} - {% endwith %} - {% endif %} - {{ parameter.description|convert_markdown(heading_level, html_id) }} - {% if parameter.default %} - {% with expression = parameter.default %} - {% include "expression.html" with context %} - {% endwith %} - {% else %} - required - {% endif %} -
- {% endblock table_style %} -{% elif config.docstring_section_style == "list" %} - {% block list_style %} -

{{ section.title or "Parameters:" }}

- - {% endblock list_style %} -{% elif config.docstring_section_style == "spacy" %} - {% block spacy_style %} - - - - - - - - - {% for parameter in section.value %} - {% if not ((config.only_parameters or config.skip_parameters is defined) and parameter.name in (config.skip_parameters if config.skip_parameters is defined else ("nlp", "name", "vocab", "scorer"))) %} - - - - - {% endif %} - {% endfor %} - -
{{ (section.title or "PARAMETER").rstrip(":").upper() }}DESCRIPTION
{{ parameter.name }} - {{ parameter.description|convert_markdown(heading_level, html_id) }} -

- {% if parameter.annotation %} - - TYPE: - {% with expression = parameter.annotation %} - {% include "expression.html" with context %} - {% endwith %} - - {% endif %} - {% if parameter.default %} - - DEFAULT: - {% with expression = parameter.default %} - {% include "expression.html" with context %} - {% endwith %} - - {% endif %} -

-
- {% endblock spacy_style %} -{% endif %} diff --git a/docs/assets/templates/python/material/function.html b/docs/assets/templates/python/material/function.html deleted file mode 100644 index 54fa6fac1c..0000000000 --- a/docs/assets/templates/python/material/function.html +++ /dev/null @@ -1,74 +0,0 @@ -{{ log.debug("Rendering " + function.path) }} - -
-{% with html_id = function.path %} - - {% if root %} - {% set show_full_path = config.show_root_full_path %} - {% set root_members = True %} - {% elif root_members %} - {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %} - {% set root_members = False %} - {% else %} - {% set show_full_path = config.show_object_full_path %} - {% endif %} - - {% if not root or config.show_root_heading %} - - {% filter heading(heading_level, - role="function", - id=html_id, - class="doc doc-heading", - toc_label=function.name ~ "()") %} - - {% if config.separate_signature %} - {% if show_full_path %}{{ function.path }}{% else %}{{ function.name }}{% endif %} - {% else %} - {% filter highlight(language="python", inline=True) %} - {% if show_full_path %}{{ function.path }}{% else %}{{ function.name }}{% endif %} - {% include "signature.html" with context %} - {% endfilter %} - {% endif %} - - {% with labels = function.labels %} - {% include "labels.html" with context %} - {% endwith %} - - {% endfilter %} - - {% if config.separate_signature %} - {% filter highlight(language="python", inline=False) %} - {% filter format_signature(config.line_length) %} - {% if show_full_path %}{{ function.path }}{% else %}{{ function.name }}{% endif %} - {% include "signature.html" with context %} - {% endfilter %} - {% endfilter %} - {% endif %} - - {% else %} - {% if config.show_root_toc_entry %} - {% filter heading(heading_level, - role="function", - id=html_id, - toc_label=function.path if config.show_root_full_path else function.name, - hidden=True) %} - {% endfilter %} - {% endif %} - {% set heading_level = heading_level - 1 %} - {% endif %} - -
- {% with docstring_sections = function.docstring.parsed %} - {% include "docstring.html" with context %} - {% endwith %} - - {% if not config.only_parameters and config.show_source and function.source %} -
- Source code in {{ function.relative_filepath }} - {{ function.source|highlight(language="python", linestart=function.lineno, linenums=True) }} -
- {% endif %} -
- -{% endwith %} -
diff --git a/docs/index.md b/docs/index.md index 69962fab7d..3e6152a4ab 100644 --- a/docs/index.md +++ b/docs/index.md @@ -67,7 +67,11 @@ This example is complete, it should run as-is. To learn more about EDS-NLP, we have prepared a series of tutorials that should cover the main features of the library. ---8<-- "docs/tutorials/index.md:tutorials" +--8<-- "docs/tutorials/index.md:classic-tutorials" + +We also provide tutorials on how to train deep-learning models with EDS-NLP. These tutorials cover the training API, hyperparameter tuning, and more. + +--8<-- "docs/tutorials/index.md:deep-learning-tutorials" ## Available pipeline components diff --git a/docs/scripts/autorefs/LICENSE b/docs/scripts/autorefs/LICENSE deleted file mode 100644 index 15b59d08a1..0000000000 --- a/docs/scripts/autorefs/LICENSE +++ /dev/null @@ -1,16 +0,0 @@ -ISC License - -Copyright (c) 2019, Oleh Prypin -Copyright (c) 2019, Timothée Mazzucotelli - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/docs/scripts/autorefs/plugin.py b/docs/scripts/autorefs/plugin.py deleted file mode 100644 index 230f95b2c6..0000000000 --- a/docs/scripts/autorefs/plugin.py +++ /dev/null @@ -1,470 +0,0 @@ -# ruff: noqa: E501 -""" -# ----------- -VENDORED https://github.com/mkdocstrings/autorefs/blob/e19b9fa47dac136a529c2be0d7969106ca5d5106/src/mkdocs_autorefs/ -Waiting for the following PR to be merged: https://github.com/mkdocstrings/autorefs/pull/25 -# ----------- - -This module contains the "mkdocs-autorefs" plugin. - -After each page is processed by the Markdown converter, this plugin stores absolute URLs of every HTML anchors -it finds to later be able to fix unresolved references. -It stores them during the [`on_page_content` event hook](https://www.mkdocs.org/user-guide/plugins/#on_page_content). - -Just before writing the final HTML to the disc, during the -[`on_post_page` event hook](https://www.mkdocs.org/user-guide/plugins/#on_post_page), -this plugin searches for references of the form `[identifier][]` or `[title][identifier]` that were not resolved, -and fixes them using the previously stored identifier-URL mapping. -""" -import contextlib -import functools -import logging -import os -import re -from html import escape, unescape -from typing import Any, Callable, Dict, List, Match, Optional, Sequence, Tuple, Union -from urllib.parse import urlsplit -from xml.etree.ElementTree import Element - -from markdown import Markdown -from markdown.extensions import Extension -from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor -from markdown.util import INLINE_PLACEHOLDER_RE -from mkdocs.config import Config -from mkdocs.config import config_options as c -from mkdocs.plugins import BasePlugin -from mkdocs.structure.pages import Page -from mkdocs.structure.toc import AnchorLink -from mkdocs.utils import warning_filter - -AUTO_REF_RE = re.compile( - r"autorefs-identifier|autorefs-optional|autorefs-optional-hover)=" - r'("?)(?P[^"<>]*)\2>(?P.*?)</span>' -) -"""A regular expression to match mkdocs-autorefs' special reference markers -in the [`on_post_page` hook][mkdocs_autorefs.plugin.AutorefsPlugin.on_post_page]. -""" - -EvalIDType = Tuple[Any, Any, Any] - - -class AutoRefInlineProcessor(ReferenceInlineProcessor): - """A Markdown extension.""" - - def __init__(self, *args, **kwargs): # noqa: D107 - super().__init__(REFERENCE_RE, *args, **kwargs) - - # Code based on - # https://github.com/Python-Markdown/markdown/blob/8e7528fa5c98bf4652deb13206d6e6241d61630b/markdown/inlinepatterns.py#L780 - - def handleMatch(self, m, data) -> Union[Element, EvalIDType]: # type: ignore[override] # noqa: N802,WPS111 - """Handle an element that matched. - - Arguments: - m: The match object. - data: The matched data. - - Returns: - A new element or a tuple. - """ - text, index, handled = self.getText(data, m.end(0)) - if not handled: - return None, None, None - - identifier, end, handled = self.evalId(data, index, text) - if not handled: - return None, None, None - - if re.search(r"[/ \x00-\x1f]", identifier): - # Do nothing if the matched reference contains: - # - a space, slash or control character (considered unintended); - # - specifically \x01 is used by Python-Markdown HTML stash when there's inline formatting, - # but references with Markdown formatting are not possible anyway. - return None, m.start(0), end - - return self.makeTag(identifier, text), m.start(0), end - - def evalId( - self, data: str, index: int, text: str - ) -> EvalIDType: # noqa: N802 (parent's casing) - """Evaluate the id portion of `[ref][id]`. - - If `[ref][]` use `[ref]`. - - Arguments: - data: The data to evaluate. - index: The starting position. - text: The text to use when no identifier. - - Returns: - A tuple containing the identifier, its end position, and whether it matched. - """ - m = self.RE_LINK.match(data, pos=index) # noqa: WPS111 - if not m: - return None, index, False - - identifier = m.group(1) - if not identifier: - identifier = text - # Allow the entire content to be one placeholder, with the intent of catching things like [`Foo`][]. - # It doesn't catch [*Foo*][] though, just due to the priority order. - # https://github.com/Python-Markdown/markdown/blob/1858c1b601ead62ed49646ae0d99298f41b1a271/markdown/inlinepatterns.py#L78 - if INLINE_PLACEHOLDER_RE.fullmatch(identifier): - identifier = self.unescape(identifier) - - end = m.end(0) - return identifier, end, True - - def makeTag(self, identifier: str, text: str) -> Element: # type: ignore[override] # noqa: N802,W0221 - """Create a tag that can be matched by `AUTO_REF_RE`. - - Arguments: - identifier: The identifier to use in the HTML property. - text: The text to use in the HTML tag. - - Returns: - A new element. - """ - el = Element("span") - el.set("data-autorefs-identifier", identifier) - el.text = text - return el - - -def relative_url(url_a: str, url_b: str) -> str: - """Compute the relative path from URL A to URL B. - - Arguments: - url_a: URL A. - url_b: URL B. - - Returns: - The relative URL to go from A to B. - """ - parts_a = url_a.split("/") - url_b, anchor = url_b.split("#", 1) - parts_b = url_b.split("/") - - # remove common left parts - while parts_a and parts_b and parts_a[0] == parts_b[0]: - parts_a.pop(0) - parts_b.pop(0) - - # go up as many times as remaining a parts' depth - levels = len(parts_a) - 1 - parts_relative = [".."] * levels + parts_b # noqa: WPS435 - relative = "/".join(parts_relative) - return f"{relative}#{anchor}" - - -def fix_ref( - url_mapper: Callable[[str], str], unmapped: List[str] -) -> Callable: # noqa: WPS212,WPS231 - """Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub). - - In our context, we match Markdown references and replace them with HTML links. - - When the matched reference's identifier was not mapped to an URL, we append the identifier to the outer - `unmapped` list. It generally means the user is trying to cross-reference an object that was not collected - and rendered, making it impossible to link to it. We catch this exception in the caller to issue a warning. - - Arguments: - url_mapper: A callable that gets an object's site URL by its identifier, - such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][]. - unmapped: A list to store unmapped identifiers. - - Returns: - The actual function accepting a [`Match` object](https://docs.python.org/3/library/re.html#match-objects) - and returning the replacement strings. - """ - - def inner(match: Match): # noqa: WPS212,WPS430 - identifier = match["identifier"] - title = match["title"] - kind = match["kind"] - - try: - url = url_mapper(unescape(identifier)) - except KeyError: - if kind == "autorefs-optional": - return title - elif kind == "autorefs-optional-hover": - return f'<span title="{identifier}">{title}</span>' - unmapped.append(identifier) - if title == identifier: - return f"[{identifier}][]" - return f"[{title}][{identifier}]" - - parsed = urlsplit(url) - external = parsed.scheme or parsed.netloc - classes = ["autorefs", "autorefs-external" if external else "autorefs-internal"] - class_attr = " ".join(classes) - if kind == "autorefs-optional-hover": - return f'<a class="{class_attr}" title="{identifier}" href="{escape(url)}">{title}</a>' - return f'<a class="{class_attr}" href="{escape(url)}">{title}</a>' - - return inner - - -def fix_refs(html: str, url_mapper: Callable[[str], str]) -> Tuple[str, List[str]]: - """Fix all references in the given HTML text. - - Arguments: - html: The text to fix. - url_mapper: A callable that gets an object's site URL by its identifier, - such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][]. - - Returns: - The fixed HTML. - """ - unmapped = [] # type: ignore - html = AUTO_REF_RE.sub(fix_ref(url_mapper, unmapped), html) - return html, unmapped - - -class AutorefsExtension(Extension): - """Extension that inserts auto-references in Markdown.""" - - def extendMarkdown( - self, md: Markdown - ) -> None: # noqa: N802 (casing: parent method's name) - """Register the extension. - - Add an instance of our [`AutoRefInlineProcessor`][mkdocs_autorefs.references.AutoRefInlineProcessor] to the Markdown parser. - - Arguments: - md: A `markdown.Markdown` instance. - """ - md.inlinePatterns.register( - AutoRefInlineProcessor(md), - "mkdocs-autorefs", - priority=168, # noqa: WPS432 # Right after markdown.inlinepatterns.ReferenceInlineProcessor - ) - - -log = logging.getLogger(f"mkdocs.plugins.{__name__}") -log.addFilter(warning_filter) - - -class AutorefsPlugin(BasePlugin): - """An `mkdocs` plugin. - - This plugin defines the following event hooks: - - - `on_config` - - `on_page_content` - - `on_post_page` - - Check the [Developing Plugins](https://www.mkdocs.org/user-guide/plugins/#developing-plugins) page of `mkdocs` - for more information about its plugin system. - """ - - scan_toc: bool = True - current_page: Optional[str] = None - config_scheme = (("priority", c.ListOfItems(c.Type(str), default=[])),) - - def __init__(self) -> None: - """Initialize the object.""" - super().__init__() - self._url_map: Dict[str, str] = {} - self._abs_url_map: Dict[str, str] = {} - self.get_fallback_anchor: Optional[ - Callable[[str], Optional[str]] - ] = None # noqa: WPS234 - self._priority_patterns = None - - @property - def priority_patterns(self): - if self._priority_patterns is None: - self._priority_patterns = [ - os.path.join("/", pat) for pat in self.config.get("priority") - ] - return self._priority_patterns - - def register_anchor(self, url: str, identifier: str): - """Register that an anchor corresponding to an identifier was encountered when rendering the page. - - Arguments: - url: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'` - identifier: The HTML anchor (without '#') as a string. - """ - - new_url = os.path.join("/", f"{url}#{identifier}") - old_url = os.path.join("/", self._url_map.get(identifier, "")).split("#")[0] - - if identifier in self._url_map and not old_url == new_url: - rev_patterns = list(enumerate(self.priority_patterns))[::-1] - old_priority_idx = next( - (i for i, pat in rev_patterns if re.match(pat, old_url)), - len(rev_patterns), - ) - new_priority_idx = next( - (i for i, pat in rev_patterns if re.match(pat, new_url)), - len(rev_patterns), - ) - if new_priority_idx >= old_priority_idx: - return - if "reference" not in new_url: - raise Exception("URL WTF", new_url) - - self._url_map[identifier] = new_url - - def register_url(self, identifier: str, url: str): - """Register that the identifier should be turned into a link to this URL. - - Arguments: - identifier: The new identifier. - url: The absolute URL (including anchor, if needed) where this item can be found. - """ - self._abs_url_map[identifier] = url - - def _get_item_url( # noqa: WPS234 - self, - identifier: str, - fallback: Optional[Callable[[str], Sequence[str]]] = None, - ) -> str: - try: - return self._url_map[identifier] - except KeyError: - if identifier in self._abs_url_map: - return self._abs_url_map[identifier] - if fallback: - new_identifiers = fallback(identifier) - for new_identifier in new_identifiers: - with contextlib.suppress(KeyError): - url = self._get_item_url(new_identifier) - self._url_map[identifier] = url - return url - raise - - def get_item_url( # noqa: WPS234 - self, - identifier: str, - from_url: Optional[str] = None, - fallback: Optional[Callable[[str], Sequence[str]]] = None, - ) -> str: - """Return a site-relative URL with anchor to the identifier, if it's present anywhere. - - Arguments: - identifier: The anchor (without '#'). - from_url: The URL of the base page, from which we link towards the targeted pages. - fallback: An optional function to suggest alternative anchors to try on failure. - - Returns: - A site-relative URL. - """ - return self._get_item_url(identifier, fallback) - - def on_config( - self, config: Config, **kwargs - ) -> Config: # noqa: W0613,R0201 (unused arguments, cannot be static) - """Instantiate our Markdown extension. - - Hook for the [`on_config` event](https://www.mkdocs.org/user-guide/plugins/#on_config). - In this hook, we instantiate our [`AutorefsExtension`][mkdocs_autorefs.references.AutorefsExtension] - and add it to the list of Markdown extensions used by `mkdocs`. - - Arguments: - config: The MkDocs config object. - kwargs: Additional arguments passed by MkDocs. - - Returns: - The modified config. - """ - log.debug(f"{__name__}: Adding AutorefsExtension to the list") - config["markdown_extensions"].append(AutorefsExtension()) - return config - - def on_page_markdown( - self, markdown: str, page: Page, **kwargs - ) -> str: # noqa: W0613 (unused arguments) - """Remember which page is the current one. - - Arguments: - markdown: Input Markdown. - page: The related MkDocs page instance. - kwargs: Additional arguments passed by MkDocs. - - Returns: - The same Markdown. We only use this hook to map anchors to URLs. - """ - self.current_page = page.url # noqa: WPS601 - return markdown - - def on_page_content( - self, html: str, page: Page, **kwargs - ) -> str: # noqa: W0613 (unused arguments) - """Map anchors to URLs. - - Hook for the [`on_page_content` event](https://www.mkdocs.org/user-guide/plugins/#on_page_content). - In this hook, we map the IDs of every anchor found in the table of contents to the anchors absolute URLs. - This mapping will be used later to fix unresolved reference of the form `[title][identifier]` or - `[identifier][]`. - - Arguments: - html: HTML converted from Markdown. - page: The related MkDocs page instance. - kwargs: Additional arguments passed by MkDocs. - - Returns: - The same HTML. We only use this hook to map anchors to URLs. - """ - if self.scan_toc: - log.debug( - f"{__name__}: Mapping identifiers to URLs for page {page.file.src_path}" - ) - for item in page.toc.items: - self.map_urls(page, item) - return html - - def map_urls(self, page: Page, anchor: AnchorLink) -> None: - """Recurse on every anchor to map its ID to its absolute URL. - - This method populates `self.url_map` by side-effect. - - Arguments: - base_url: The base URL to use as a prefix for each anchor's relative URL. - anchor: The anchor to process and to recurse on. - """ - abs_url = os.path.join("/", page.file.url) - self.register_anchor(abs_url, anchor.id) - for child in anchor.children: - self.map_urls(page, child) - - def on_post_page( - self, output: str, page: Page, **kwargs - ) -> str: # noqa: W0613 (unused arguments) - """Fix cross-references. - - Hook for the [`on_post_page` event](https://www.mkdocs.org/user-guide/plugins/#on_post_page). - In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`. - Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings. - It uses the native Markdown syntax so it's easy to remember and use. - - We log a warning for each reference that we couldn't map to an URL, but try to be smart and ignore identifiers - that do not look legitimate (sometimes documentation can contain strings matching - our [`AUTO_REF_RE`][mkdocs_autorefs.references.AUTO_REF_RE] regular expression that did not intend to reference anything). - We currently ignore references when their identifier contains a space or a slash. - - Arguments: - output: HTML converted from Markdown. - page: The related MkDocs page instance. - kwargs: Additional arguments passed by MkDocs. - - Returns: - Modified HTML. - """ - log.debug(f"{__name__}: Fixing references in page {page.file.src_path}") - - url_mapper = functools.partial( - self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor - ) - fixed_output, unmapped = fix_refs(output, url_mapper) - - if unmapped and log.isEnabledFor(logging.WARNING): - for ref in unmapped: - log.warning( - f"{__name__}: {page.file.src_path}: Could not find cross-reference target '[{ref}]'", - ) - - return fixed_output diff --git a/docs/scripts/bibtex.py b/docs/scripts/bibtex.py deleted file mode 100644 index fcbb6c2944..0000000000 --- a/docs/scripts/bibtex.py +++ /dev/null @@ -1,292 +0,0 @@ -# Based on https://github.com/darwindarak/mdx_bib -import re -import string -from collections import Counter, OrderedDict -from typing import Tuple -from xml.etree import ElementTree as etree -from xml.etree.ElementTree import tostring as etree_to_string - -from markdown.extensions import Extension -from markdown.inlinepatterns import Pattern -from markdown.preprocessors import Preprocessor -from mkdocs.config.config_options import Type as MkType -from mkdocs.plugins import BasePlugin -from pybtex.database.input import bibtex -from pybtex.exceptions import PybtexError - -BRACKET_RE = re.compile(r"\[([^\[]+)\]") -CITE_RE = re.compile(r"@([\w_:-]+)") -DEF_RE = re.compile(r"\A {0,3}\[@([\w_:-]+)\]:\s*(.*)") -INDENT_RE = re.compile(r"\A\t| {4}(.*)") - -CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])" - - -class Bibliography(object): - """Keep track of document references and citations for exporting""" - - def __init__(self, extension, plugin, bibtex_file, order): - self.extension = extension - self.order = order - self.plugin = plugin - - self.citations = OrderedDict() - self.references = dict() - - if bibtex_file: - try: - parser = bibtex.Parser() - self.bibsource = parser.parse_file(bibtex_file).entries - self.labels = { - id: self.formatCitation(self.bibsource[id]) - for id in self.bibsource.keys() - } - for value, occurrences in Counter(self.labels.values()).items(): - if occurrences > 1: - for xkey, xvalue in self.labels.items(): - i = 0 - if xvalue == value: - self.labels[ - xkey - ] = f"{xvalue}{string.ascii_lowercase[i]}" - i += 1 - - except PybtexError: - print("Error loading bibtex file") - self.bibsource = dict() - self.labels = {} - else: - self.bibsource = dict() - - def addCitation(self, citekey): - self.citations[citekey] = self.citations.get(citekey, 0) + 1 - - def setReference(self, citekey, reference): - self.references[citekey] = reference - - def citationID(self, citekey): - return "cite-" + citekey - - def referenceID(self, citekey): - return "ref-" + citekey - - def formatAuthor(self, author): - out = ( - author.last_names[0] - + ((" " + author.first_names[0][0]) if author.first_names else "") - + "." - ) - if author.middle_names: - out += f"{author.middle_names[0][0]}." - return out.replace("{", "").replace("}", "") - - def formatAuthorSurname(self, author): - out = author.last_names[0] - return out.replace("{", "").replace("}", "") - - def formatReference(self, ref): - author_list = list(map(self.formatAuthor, ref.persons["author"])) - - if len(author_list) == 1: - authors = author_list[0] - else: - authors = ", ".join(author_list[:-1]) - authors += f" and {author_list[-1]}" - - # Harvard style - # Surname, Initial, ... and Last_Surname, - # Initial, Year. Title. Journal, Volume(Issue), pages. doi. - - title = ref.fields["title"].replace("{", "").replace("}", "") - journal = ref.fields.get("journal", "") - volume = ref.fields.get("volume", "") - issue = ref.fields.get("issue", "") - year = ref.fields.get("year") - pages = ref.fields.get("pages") - doi = ref.fields.get("doi") - url = ref.fields.get("url") - - ref_id = self.referenceID(ref.key) - ref = f"<p id={repr(ref_id)}>{authors}, {year}. {title}." - if journal: - ref += f" <i>{journal}</i>." - if volume: - ref += f" <i>{volume}</i>" - if issue: - ref += f"({issue})" - if pages: - ref += f", pp.{pages}" - ref += "." - if doi: - ref += f' <a href="https://dx.doi.org/{doi}" target="_blank">{doi}</a>' - elif url: - ref += f' <a href="{url}" target="_blank">{url}</a>' - ref += "</p>" - - return etree.fromstring(ref) - - def formatCitation(self, ref): - author_list = list(map(self.formatAuthorSurname, ref.persons["author"])) - year = ref.fields.get("year") - - if len(author_list) == 1: - citation = f"{author_list[0]}" - elif len(author_list) == 2: - citation = f"{author_list[0]} and {author_list[1]}" - else: - citation = f"{author_list[0]} et al." - - citation += f", {year}" - - return citation - - def make_bibliography(self): - if self.order == "alphabetical": - raise (NotImplementedError) - - div = etree.Element("div") - div.set("class", "footnote") - div.append(etree.Element("hr")) - ol = etree.SubElement(div, "ol") - - if not self.citations: - return div - - # table = etree.SubElement(div, "table") - # table.set("class", "references") - # tbody = etree.SubElement(table, "tbody") - etree.SubElement(div, "div") - for id in self.citations: - li = etree.SubElement(ol, "li") - li.set("id", self.referenceID(id)) - # ref_id = etree.SubElement(li, "td") - ref_txt = etree.SubElement(li, "p") - if id in self.references: - self.extension.parser.parseChunk(ref_txt, self.references[id]) - elif id in self.bibsource: - ref_txt.append(self.formatReference(self.bibsource[id])) - else: - ref_txt.text = "Missing citation for {}".format(id) - - return div - - def clear_citations(self): - self.citations = OrderedDict() - - -class CitationsPreprocessor(Preprocessor): - """Gather reference definitions and citation keys""" - - def __init__(self, bibliography): - self.bib = bibliography - - def subsequentIndents(self, lines, i): - """Concatenate consecutive indented lines""" - linesOut = [] - while i < len(lines): - m = INDENT_RE.match(lines[i]) - if m: - linesOut.append(m.group(1)) - i += 1 - else: - break - return " ".join(linesOut), i - - def run(self, lines): - linesOut = [] - i = 0 - - while i < len(lines): - # Check to see if the line starts a reference definition - m = DEF_RE.match(lines[i]) - if m: - key = m.group(1) - reference = m.group(2) - indents, i = self.subsequentIndents(lines, i + 1) - reference += " " + indents - - self.bib.setReference(key, reference) - continue - - # Look for all @citekey patterns inside hard brackets - for bracket in BRACKET_RE.findall(lines[i]): - for c in CITE_RE.findall(bracket): - self.bib.addCitation(c) - linesOut.append(lines[i]) - i += 1 - - return linesOut - - -class CitationsPattern(Pattern): - """Handles converting citations keys into links""" - - def __init__(self, pattern, bibliography): - super(CitationsPattern, self).__init__(pattern) - self.bib = bibliography - - def handleMatch(self, m): - span = etree.Element("span") - for cite_match in CITE_RE.finditer(m.group(2)): - id = cite_match.group(1) - if id in self.bib.bibsource: - a = etree.Element("a") - a.set("id", self.bib.citationID(id)) - a.set("href", "./#" + self.bib.referenceID(id)) - a.set("class", "citation") - a.text = self.bib.labels[id] - span.append(a) - else: - continue - if len(span) == 0: - return None - return span - - -context_citations = None - - -class CitationsExtension(Extension): - def __init__(self): - super(CitationsExtension, self).__init__() - self.bib = None - - def extendMarkdown(self, md): - md.registerExtension(self) - self.parser = md.parser - self.md = md - - md.preprocessors.register(CitationsPreprocessor(self.bib), "mdx_bib", 15) - md.inlinePatterns.register( - CitationsPattern(CITATION_RE, self.bib), "mdx_bib", 175 - ) - - -def makeExtension(*args, **kwargs): - return CitationsExtension(*args, **kwargs) - - -class BibTexPlugin(BasePlugin): - config_scheme: Tuple[Tuple[str, MkType]] = ( - ("bibtex_file", MkType(str)), # type: ignore[assignment] - ("order", MkType(str, default="unsorted")), # type: ignore[assignment] - ) - - def __init__(self): - self.citations = None - - def on_config(self, config, **kwargs): - extension = CitationsExtension() - self.bib = Bibliography( - extension, - self, - self.config["bibtex_file"], - self.config["order"], - ) - extension.bib = self.bib - config["markdown_extensions"].append(extension) - - def on_page_content(self, html, page, config, files): - html += "\n" + etree_to_string(self.bib.make_bibliography()).decode() - self.bib.clear_citations() - return html diff --git a/docs/scripts/cards.py b/docs/scripts/cards.py deleted file mode 100644 index f0125bebb1..0000000000 --- a/docs/scripts/cards.py +++ /dev/null @@ -1,276 +0,0 @@ -""" -Adapted from pymdownx.tabbed (https://github.com/facelessuser/pymdown-extensions/) -""" -import re -import xml.etree.ElementTree as etree - -from markdown import Extension -from markdown.blockprocessors import BlockProcessor -from markdown.extensions.attr_list import AttrListTreeprocessor, get_attrs - - -def assign_attrs(elem, attrs): - """Assign `attrs` to element.""" - for k, v in get_attrs(attrs): - if k == ".": - # add to class - cls = elem.get("class") - if cls: - elem.set("class", "{} {}".format(cls, v)) - else: - elem.set("class", v) - else: - # assign attribute `k` with `v` - elem.set(AttrListTreeprocessor.NAME_RE.sub("_", k), v) - - -class CardProcessor(BlockProcessor): - """card block processor.""" - - START = re.compile(r"(?:^|\n)={3} *(card)?(?: +({:.*?}) *(?:\n|$))?") - COMPRESS_SPACES = re.compile(r" {2,}") - - def __init__(self, parser, config): - """Initialize.""" - - super().__init__(parser) - self.card_group_count = 0 - self.current_sibling = None - self.content_indention = 0 - - def detab_by_length(self, text, length): - """Remove a card from the front of each line of the given text.""" - - newtext = [] - lines = text.split("\n") - for line in lines: - if line.startswith(" " * length): - newtext.append(line[length:]) - elif not line.strip(): - newtext.append("") # pragma: no cover - else: - break - return "\n".join(newtext), "\n".join(lines[len(newtext) :]) - - def parse_content(self, parent, block): - """ - Get sibling card. - - Retrieve the appropriate sibling element. This can get tricky when - dealing with lists. - - """ - - old_block = block - non_cards = "" - card_set = "card-set" - - # We already acquired the block via test - if self.current_sibling is not None: - sibling = self.current_sibling - block, non_cards = self.detab_by_length(block, self.content_indent) - self.current_sibling = None - self.content_indent = 0 - return sibling, block, non_cards - - sibling = self.lastChild(parent) - - if ( - sibling is None - or sibling.tag.lower() != "div" - or sibling.attrib.get("class", "") != card_set - ): - sibling = None - else: - # If the last child is a list and the content is indented sufficient - # to be under it, then the content's is sibling is in the list. - last_child = self.lastChild(sibling) - card_content = "card-content" - child_class = ( - last_child.attrib.get("class", "") if last_child is not None else "" - ) - indent = 0 - while last_child is not None: - if ( - sibling is not None - and block.startswith(" " * self.tab_length * 2) - and last_child is not None - and ( - last_child.tag in ("ul", "ol", "dl") - or (last_child.tag == "div" and child_class == card_content) - ) - ): - # Handle nested card content - if last_child.tag == "div" and child_class == card_content: - temp_child = self.lastChild(last_child) - if temp_child is None or temp_child.tag not in ( - "ul", - "ol", - "dl", - ): - break - last_child = temp_child - - # The expectation is that we'll find an `<li>`. - # We should get it's last child as well. - sibling = self.lastChild(last_child) - last_child = ( - self.lastChild(sibling) if sibling is not None else None - ) - child_class = ( - last_child.attrib.get("class", "") - if last_child is not None - else "" - ) - - # Context has been lost at this point, so we must adjust the - # text's indentation level so it will be evaluated correctly - # under the list. - block = block[self.tab_length :] - indent += self.tab_length - else: - last_child = None - - if not block.startswith(" " * self.tab_length): - sibling = None - - if sibling is not None: - indent += self.tab_length - block, non_cards = self.detab_by_length(old_block, indent) - self.current_sibling = sibling - self.content_indent = indent - - return sibling, block, non_cards - - def test(self, parent, block): - """Test block.""" - - if self.START.search(block): - return True - else: - return self.parse_content(parent, block)[0] is not None - - def run(self, parent, blocks): - """Convert to card block.""" - - block = blocks.pop(0) - m = self.START.search(block) - card_set = "card-set" - - if m: - # removes the first line - if m.start() > 0: - self.parser.parseBlocks(parent, [block[: m.start()]]) - block = block[m.end() :] - sibling = self.lastChild(parent) - block, non_cards = self.detab(block) - else: - sibling, block, non_cards = self.parse_content(parent, block) - - if m: - if ( - sibling is not None - and sibling.tag.lower() == "div" - and sibling.attrib.get("class", "") == card_set - ): - card_group = sibling - else: - self.card_group_count += 1 - card_group = etree.SubElement( - parent, - "div", - { - "class": card_set, - "data-cards": "%d:0" % self.card_group_count, - }, - ) - - data = card_group.attrib["data-cards"].split(":") - card_set = int(data[0]) - card_count = int(data[1]) + 1 - - div = etree.SubElement( - card_group, - "div", - { - "class": "card-content", - }, - ) - attributes = m.group(2) - - if attributes: - attr_m = AttrListTreeprocessor.INLINE_RE.search(attributes) - if attr_m: - assign_attrs(div, attr_m.group(1)) - if div.get("href"): - div.tag = "a" - - card_group.attrib["data-cards"] = "%d:%d" % (card_set, card_count) - else: - if sibling.tag in ("li", "dd") and sibling.text: - # Sibling is a list item, but we need to wrap it's content should be - # wrapped in <p> - text = sibling.text - sibling.text = "" - p = etree.SubElement(sibling, "p") - p.text = text - div = sibling - elif sibling.tag == "div" and sibling.attrib.get("class", "") == card_set: - # Get `card-content` under `card-set` - div = self.lastChild(sibling) - else: - # Pass anything else as the parent - div = sibling - - self.parser.parseChunk(div, block) - - if non_cards: - # Insert the card content back into blocks - blocks.insert(0, non_cards) - - -class CardExtension(Extension): - """Add card extension.""" - - def __init__(self, *args, **kwargs): - """Initialize.""" - - self.config = { - "slugify": [ - 0, - "Slugify function used to create card specific IDs - Default: None", - ], - "combine_header_slug": [ - False, - "Combine the card slug with the slug of the parent header - " - "Default: False", - ], - "separator": ["-", "Slug separator - Default: '-'"], - } - - super(CardExtension, self).__init__(*args, **kwargs) - - def extendMarkdown(self, md): - """Add card to Markdown instance.""" - md.registerExtension(self) - - config = self.getConfigs() - - self.card_processor = CardProcessor(md.parser, config) - - md.parser.blockprocessors.register( - self.card_processor, - "card", - 105, - ) - - def reset(self): - """Reset.""" - - self.card_processor.card_group_count = 0 - - -def makeExtension(*args, **kwargs): - """Return extension.""" - - return CardExtension(*args, **kwargs) diff --git a/docs/scripts/clickable_snippets.py b/docs/scripts/clickable_snippets.py deleted file mode 100644 index ccb836e15c..0000000000 --- a/docs/scripts/clickable_snippets.py +++ /dev/null @@ -1,273 +0,0 @@ -# Based on https://github.com/darwindarak/mdx_bib -import os -from bisect import bisect_right -from collections import defaultdict -from typing import Tuple - -import jedi -import mkdocs.structure.pages -import parso -import regex -from mkdocs.config.config_options import Type as MkType -from mkdocs.config.defaults import MkDocsConfig -from mkdocs.plugins import BasePlugin - -from docs.scripts.autorefs.plugin import AutorefsPlugin - -try: - from importlib.metadata import entry_points -except ImportError: - from importlib_metadata import entry_points - - -from bs4 import BeautifulSoup - -# Used to match href in HTML to replace with a relative path -HREF_REGEX = ( - r"(?<=<\s*(?:a[^>]*href|img[^>]*src)=)" - r'(?:"([^"]*)"|\'([^\']*)|[ ]*([^ =>]*)(?![a-z]+=))' -) -# Maybe find something less specific ? -PIPE_REGEX = r"(?<![a-zA-Z0-9._-])eds[.]([a-zA-Z0-9._-]*)(?![a-zA-Z0-9._-])" - -HTML_PIPE_REGEX = r"""(?x) -(?<![a-zA-Z0-9._-]) -<span[^>]*>eds<\/span> -<span[^>]*>[.]<\/span> -<span[^>]*>([a-zA-Z0-9._-]*)<\/span> -(?![a-zA-Z0-9._-]) -""" - -REGISTRY_REGEX = r"""(?x) -(?<![a-zA-Z0-9._-]) -<span[^>]*>(?:"|&\#39;|")@([a-zA-Z0-9._-]*)(?:"|&\#39;|")<\/span>\s* -<span[^>]*>:<\/span>\s* -<span[^>]*>\s*<\/span>\s* -<span[^>]*>(?:"|&\#39;|")?([a-zA-Z0-9._-]*)(?:"|&\#39;|")?<\/span> -(?![a-zA-Z0-9._-]) -""" - -CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])" - - -class ClickableSnippetsPlugin(BasePlugin): - config_scheme: Tuple[Tuple[str, MkType]] = () - - @mkdocs.plugins.event_priority(1000) - def on_config(self, config: MkDocsConfig): - for event_name, events in config.plugins.events.items(): - for event in list(events): - if "autorefs" in str(event): - events.remove(event) - old_plugin = config["plugins"]["autorefs"] - plugin_config = dict(old_plugin.config) - plugin = AutorefsPlugin() - config.plugins["autorefs"] = plugin - config["plugins"]["autorefs"] = plugin - plugin.load_config(plugin_config) - - @classmethod - def get_ep_namespace(cls, ep, namespace=None): - if hasattr(ep, "select"): - return ep.select(group=namespace) if namespace else list(ep._all) - else: # dict - return ( - ep.get(namespace, []) - if namespace - else (x for g in ep.values() for x in g) - ) - - @mkdocs.plugins.event_priority(-1000) - def on_post_page( - self, - output: str, - page: mkdocs.structure.pages.Page, - config: mkdocs.config.Config, - ): - """ - 1. Replace absolute paths with path relative to the rendered page - This must be performed after all other plugins have run. - 2. Replace component names with links to the component reference - - Parameters - ---------- - output - page - config - - Returns - ------- - - """ - - autorefs: AutorefsPlugin = config["plugins"]["autorefs"] - ep = entry_points() - page_url = os.path.join("/", page.file.url) - factories_entry_points = { - ep.name: ep.value - for ep in ( - *self.get_ep_namespace(ep, "spacy_factories"), - *self.get_ep_namespace(ep, "edsnlp_factories"), - *self.get_ep_namespace(ep, "spacy_scorers"), - ) - } - all_entry_points = defaultdict(dict) - for ep in self.get_ep_namespace(ep): - if ep.group.startswith("edsnlp_") or ep.group.startswith("spacy_"): - group = ep.group.split("_", 1)[1] - all_entry_points[group][ep.name] = ep.value - - # This method is meant for replacing any component that - # appears in a "eds.component" format, no matter if it is - # preceded by a "@factory" or not. - def replace_factory_component(match): - full_match = match.group(0) - name = "eds." + match.group(1) - ep = factories_entry_points.get(name) - preceding = output[match.start(0) - 50 : match.start(0)] - if ep is not None and "DEFAULT:" not in preceding: - try: - url = autorefs.get_item_url(ep.replace(":", ".")) - except KeyError: - pass - else: - return f"<a href={url}>{name}</a>" - return full_match - - # This method is meant for replacing any component that - # appears in a "@registry": "component" format - def replace_any_registry_component(match): - full_match = match.group(0) - group = match.group(1) - name = match.group(2) - ep = all_entry_points[group].get(name) - preceding = output[match.start(0) - 50 : match.start(0)] - if ep is not None and "DEFAULT:" not in preceding: - try: - url = autorefs.get_item_url(ep.replace(":", ".")) - except KeyError: - pass - else: - repl = f'<a href={url} class="discrete-link">{name}</a>' - before = full_match[: match.start(2) - match.start(0)] - after = full_match[match.end(2) - match.start(0) :] - return before + repl + after - return full_match - - def replace_link(match): - relative_url = url = match.group(1) or match.group(2) or match.group(3) - if url.startswith("/"): - relative_url = os.path.relpath(url, page_url) - return f'"{relative_url}"' - - output = regex.sub(PIPE_REGEX, replace_factory_component, output) - output = regex.sub(HTML_PIPE_REGEX, replace_factory_component, output) - output = regex.sub(REGISTRY_REGEX, replace_any_registry_component, output) - - all_snippets = "" - all_offsets = [] - all_nodes = [] - - soups = [] - - # Replace absolute paths with path relative to the rendered page - for match in regex.finditer("<code>.*?</code>", output, flags=regex.DOTALL): - node = match.group(0) - if "\n" in node: - soup, snippet, python_offsets, html_nodes = self.convert_html_to_code( - node - ) - size = len(all_snippets) - all_snippets += snippet + "\n" - all_offsets.extend([size + i for i in python_offsets]) - all_nodes.extend(html_nodes) - soups.append((soup, match.start(0), match.end(0))) - - interpreter = jedi.Interpreter(all_snippets, [{}]) - line_lengths = [0] - for line in all_snippets.split("\n"): - line_lengths.append(len(line) + line_lengths[-1] + 1) - line_lengths[-1] -= 1 - - for name in self.iter_names(interpreter._module_node): - try: - line, col = name.start_pos - offset = line_lengths[line - 1] + col - node_idx = bisect_right(all_offsets, offset) - 1 - - node = all_nodes[node_idx] - gotos = interpreter.goto(line, col, follow_imports=True) - gotos = [ - goto - for goto in gotos - if ( - goto - and goto.full_name - and goto.full_name.startswith("edsnlp") - and goto.type != "module" - ) - ] - goto = gotos[0] if gotos else None - if goto: - url = autorefs.get_item_url(goto.full_name) - # Check if node has no link in its upstream ancestors - if not node.find_parents("a"): - node.replace_with( - BeautifulSoup( - f'<a class="discrete-link" href="{url}">{node}</a>', - "html5lib", - ) - ) - except Exception: - pass - - # Re-insert soups into the output - for soup, start, end in reversed(soups): - output = output[:start] + str(soup.find("code")) + output[end:] - - output = regex.sub(HREF_REGEX, replace_link, output) - - return output - - @classmethod - def iter_names(cls, root): - if isinstance(root, parso.python.tree.Name): - yield root - for child in getattr(root, "children", ()): - yield from cls.iter_names(child) - - @classmethod - def convert_html_to_code( - cls, html_content: str - ) -> Tuple[BeautifulSoup, str, list, list]: - pre_html_content = "<pre>" + html_content + "</pre>" - soup = list(BeautifulSoup(pre_html_content, "html5lib").children)[0] - code_element = soup.find("code") - - line_lengths = [0] - for line in pre_html_content.split("\n"): - line_lengths.append(len(line) + line_lengths[-1] + 1) - line_lengths[-1] -= 1 - - python_code = "" - code_offsets = [] - html_nodes = [] - code_offset = 0 - - def extract_text_with_offsets(el): - nonlocal python_code, code_offset - for content in el.contents: - # check not class md-annotation - # Recursively process child elements - if isinstance(content, str): - python_code += content - code_offsets.append(code_offset) - code_offset += len(content) - html_nodes.append(content) - continue - if "md-annotation" not in content.get("class", ""): - extract_text_with_offsets(content) - - extract_text_with_offsets(code_element) - - return soup, python_code, code_offsets, html_nodes diff --git a/docs/scripts/griffe_ext.py b/docs/scripts/griffe_ext.py index deb9d1bdf6..1874cab08b 100644 --- a/docs/scripts/griffe_ext.py +++ b/docs/scripts/griffe_ext.py @@ -3,13 +3,17 @@ import inspect import logging import sys -from typing import Union +from typing import Any, Union import astunparse -from griffe import Extension, Object, ObjectNode -from griffe.docstrings.dataclasses import DocstringSectionParameters -from griffe.expressions import Expr -from griffe.logger import patch_loggers +from griffe import ( + DocstringSectionParameters, + Expr, + Extension, + Object, + ObjectNode, + patch_loggers, +) def get_logger(name): @@ -31,7 +35,12 @@ def __init__(self): self.FACT_MEM = {} self.PIPE_TO_FACT = {} - def on_instance(self, node: Union[ast.AST, ObjectNode], obj: Object) -> None: + def on_instance( + self, + node: Union[ast.AST, ObjectNode], + obj: Object, + agent: Any = None, + ) -> None: if ( isinstance(node, ast.Assign) and obj.name == "create_component" diff --git a/docs/scripts/plugin.py b/docs/scripts/plugin.py deleted file mode 100644 index 60f2212b6f..0000000000 --- a/docs/scripts/plugin.py +++ /dev/null @@ -1,167 +0,0 @@ -import os -from pathlib import Path - -import jedi -import mkdocs.config -import mkdocs.plugins -import mkdocs.structure -import mkdocs.structure.files -import mkdocs.structure.nav -import mkdocs.structure.pages -from bs4 import BeautifulSoup - - -def exclude_file(name): - return name.startswith("assets/fragments/") - - -# Add the files from the project root - -VIRTUAL_FILES = {} -REFERENCE_TEMPLATE = """ -# `{ident}` -::: {ident} - options: - show_source: false -""" - - -def on_files(files: mkdocs.structure.files.Files, config: mkdocs.config.Config): - """ - Recursively the navigation of the mkdocs config - and recursively content of directories of page that point - to directories. - - Parameters - ---------- - config: mkdocs.config.Config - The configuration object - kwargs: dict - Additional arguments - """ - - root = Path("edsnlp") - reference_nav = [] - for path in sorted(root.rglob("*.py")): - module_path = path.relative_to(root.parent).with_suffix("") - doc_path = Path("reference") / path.relative_to(root.parent).with_suffix(".md") - # full_doc_path = Path("docs/reference/") / doc_path - parts = list(module_path.parts) - current = reference_nav - for part in parts[:-1]: - sub = next((item[part] for item in current if part in item), None) - if sub is None: - current.append({part: []}) - sub = current[-1][part] - current = sub - if parts[-1] == "__init__": - parts = parts[:-1] - doc_path = doc_path.with_name("index.md") - current.append({"index.md": str(doc_path)}) - elif parts[-1] == "__main__": - continue - else: - current.append({parts[-1]: str(doc_path)}) - ident = ".".join(parts) - os.makedirs(doc_path.parent, exist_ok=True) - VIRTUAL_FILES[str(doc_path)] = REFERENCE_TEMPLATE.format(ident=ident) - - for item in config["nav"]: - if not isinstance(item, dict): - continue - key = next(iter(item.keys())) - if not isinstance(item[key], str): - continue - if item[key].strip("/") == "reference": - item[key] = reference_nav - - VIRTUAL_FILES["contributing.md"] = Path("contributing.md").read_text() - VIRTUAL_FILES["changelog.md"] = Path("changelog.md").read_text() - - return mkdocs.structure.files.Files( - [file for file in files if not exclude_file(file.src_path)] - + [ - mkdocs.structure.files.File( - file, - config["docs_dir"], - config["site_dir"], - config["use_directory_urls"], - ) - for file in VIRTUAL_FILES - ] - ) - - -def on_nav(nav, config, files): - def rec(node): - if isinstance(node, list): - return [rec(item) for item in node] - if node.is_section and node.title == "Code Reference": - return - if isinstance(node, mkdocs.structure.nav.Navigation): - return rec(node.items) - if isinstance(node, mkdocs.structure.nav.Section): - if ( - len(node.children) - and node.children[0].is_page - and node.children[0].is_index - ): - first = node.children[0] - link = mkdocs.structure.nav.Link( - title=first.title, - url=first.url, - ) - link.is_index = True - first.title = "Overview" - node.children.insert(0, link) - return rec(node.children) - - rec(nav.items) - - -def on_page_read_source(page, config): - if page.file.src_path in VIRTUAL_FILES: - return VIRTUAL_FILES[page.file.src_path] - return None - - -# Get current git commit -GIT_COMMIT = os.popen("git rev-parse --short HEAD").read().strip() - - -@mkdocs.plugins.event_priority(-2000) -def on_post_page( - output: str, - page: mkdocs.structure.pages.Page, - config: mkdocs.config.Config, -): - """ - Add github links to the html output - """ - # Find all the headings (h1, h2, ...) whose id starts with "edsnlp" - soup = BeautifulSoup(output, "html.parser") - for heading in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]): - ref = heading.get("id", "") - if ref.startswith("edsnlp.") and "--" not in ref: - code = "import edsnlp; " + ref - interpreter = jedi.Interpreter(code, namespaces=[{}]) - goto = interpreter.infer(1, len(code)) - try: - file = goto[0].module_path.relative_to(Path.cwd()) - except Exception: - goto = [] - if not goto: - continue - line = goto[0].line - # Add a "[source]" span with a link to the source code in a new tab - url = f"https://github.com/aphp/edsnlp/blob/{GIT_COMMIT}/{file}#L{line}" - heading.append( - BeautifulSoup( - f'<span class="sourced-heading-spacer"></span>' - f'<a href="{url}" target="_blank">[source]</a>', - features="html.parser", - ) - ) - # add "sourced-heading" to heading class - heading["class"] = heading.get("class", []) + ["sourced-heading"] - return str(soup) diff --git a/docs/training/loggers.md b/docs/training/loggers.md index 1b70b92472..cc06c6de59 100644 --- a/docs/training/loggers.md +++ b/docs/training/loggers.md @@ -22,7 +22,7 @@ You can configure loggers in `edsnlp.train` via the `logger` parameter of the `t train: ... logger: - "@loggers": csv + "@loggers": csv !draft ... ``` @@ -50,13 +50,13 @@ You can configure loggers in `edsnlp.train` via the `logger` parameter of the `t !!! note "Draft objects" - `edsnlp.train` can provide a default project name and logging dir for loggers that require these parameters. For these loggers, if you don't want to set the project name yourself, you can either: + `edsnlp.train` can provide a default `project_name` and `logging_dir` for loggers that require these parameters. For these loggers, if you don't want to set the project name yourself, you can either: - call `CSVLogger.draft(...)` without the normal init parameters minus the `project_name` or `logging_dir` parameters, - which will cause a `Draft[CSVLogger]` object to be returned if some required parameters are missing - - or use `"@loggers": csv !draft` in the config file, which will also cause a `Draft[CSVLogger]` object to be returned if some required - parameters are missing - - use the shorthand `logger: ["csv", "tensorboard", ...]`, which will use the default project name and logging dir + which will cause a `Draft[CSVLogger]` object to be returned, which be instantiated later when the required parameters + are available + - or use `"@loggers": csv !draft` in the config file, which is the config file equivalent to the `.draft()` method above + - use the string shorthands `logger: ["csv", "tensorboard", ...]`, which will use the default project name and logging dir The supported loggers are listed below. diff --git a/docs/tutorials/training-ner.md b/docs/tutorials/training-ner.md index ddad45a7b0..c3c151e823 100644 --- a/docs/tutorials/training-ner.md +++ b/docs/tutorials/training-ner.md @@ -165,7 +165,7 @@ Visit the [`edsnlp.train` documentation][edsnlp.training.trainer.train] for a li span_setter: 'gold_spans' loggers: - - '@loggers': csv + - '@loggers': csv !draft - '@loggers': rich fields: step: {} diff --git a/mkdocs.yml b/mkdocs.yml index 8a53f195e3..1271f334f5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -3,7 +3,7 @@ site_name: EDS-NLP repo_url: https://github.com/aphp/edsnlp repo_name: aphp/edsnlp -# copyright: Copyright © 2022 – Assistance Publique - Hôpitaux de Paris +copyright: Copyright © 2025 – Assistance Publique - Hôpitaux de Paris # extra: # social: @@ -145,13 +145,12 @@ nav: - data/spark.md - data/converters.md - Training: + - training/training-api.md - training/loggers.md - Concepts: - concepts/pipeline.md - concepts/torch-component.md - concepts/inference.md - - Training: - - training/training-api.md - Metrics: - metrics/index.md - metrics/ner.md @@ -180,30 +179,27 @@ extra: extra_css: - assets/stylesheets/extra.css - - assets/stylesheets/cards.css - #- assets/termynal/termynal.css - -extra_javascript: - #- https://cdn.jsdelivr.net/npm/vega@5 - #- https://cdn.jsdelivr.net/npm/vega-lite@5 - #- https://cdn.jsdelivr.net/npm/vega-embed@6 - - https://polyfill.io/v3/polyfill.min.js?features=es6 - # - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js watch: - contributing.md - changelog.md - edsnlp -hooks: - - docs/scripts/plugin.py - plugins: + - auto_gen_pages: + package_dirs: ["pret"] + reference_section: Reference + exclude_glob: assets/fragments/* + copy_files: + changelog.md: changelog.md + contributing.md: contributing.md + - fix_fonts: + - cards: - redirects: redirect_maps: 'pipes/trainable/span-qualifier.md': 'pipes/trainable/span-classifier.md' 'tutorials/training.md': 'training/training-api.md' - - search + - search: - minify: minify_html: true minify_js: true @@ -213,11 +209,12 @@ plugins: cache_safe: true - autorefs: priority: - - .* + - '*' - reference + - pret_snippet_renderer: + - mkdocstrings_options_templates: - mkdocstrings: enable_inventory: true - custom_templates: docs/assets/templates handlers: python: options: @@ -230,9 +227,13 @@ plugins: show_root_toc_entry: false show_signature: false merge_init_into_class: true + inventories: + - https://aphp.github.io/confit/latest/objects.inv + - https://percevalw.github.io/metanno/latest/objects.inv + - https://percevalw.github.io/pret/latest/objects.inv - bibtex: bibtex_file: "docs/references.bib" - - clickable_snippets: + - clickable_code: # Just uncomment the following lines to enable i18n # and start creating .fr.md and .en.md files. # - i18n: @@ -267,7 +268,6 @@ markdown_extensions: - pymdownx.emoji: emoji_index: !!python/name:materialx.emoji.twemoji emoji_generator: !!python/name:materialx.emoji.to_svg - - docs.scripts.cards validation: absolute_links: ignore diff --git a/pyproject.toml b/pyproject.toml index f64ac98ae9..2c8ddc6520 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,23 +64,8 @@ dev-no-ml = [ "edsnlp[docs-no-ml]", ] docs-no-ml = [ - "mike~=1.1.2", - "mkdocs-charts-plugin==0.0.8", - "mkdocs-img2fig-plugin==0.9.3", - "mkdocs-material~=9.2.0", - "mkdocs-section-index==0.3.4", - "mkdocs~=1.5.2", - "mkdocstrings>=0.20,<0.28.0", - "mkdocstrings-python~=1.1", - "mkdocs-minify-plugin", - "mkdocs-redirects>=1.2.1;python_version>='3.8'", - "markdown-grid-tables==0.4.0", - "pybtex~=0.24.0", - "pathspec>=0.11.1", # required by vendored mkdocs-autorefs PR - "astunparse", - "griffe<0.39", - "jedi", - "html5lib", + "mkdocs-eds @ git+https://github.com/percevalw/mkdocs-eds.git@main#egg=mkdocs-eds ; python_version>='3.9'", + "markdown-grid-tables==0.4.0; python_version>='3.9'", ] ml = [ "rich-logger>=0.3.1", @@ -333,11 +318,6 @@ where = ["."] [project.entry-points."spacy_tokenizers"] "eds.tokenizer" = "edsnlp.language:create_eds_tokenizer" -[project.entry-points."mkdocs.plugins"] -"bibtex" = "docs.scripts.bibtex:BibTexPlugin" -"autorefs" = "docs.scripts.autorefs.plugin:AutorefsPlugin" -"clickable_snippets" = "docs.scripts.clickable_snippets:ClickableSnippetsPlugin" - [build-system] requires = [ "setuptools", diff --git a/tests/extract_docs_code.py b/tests/extract_docs_code.py index 55d499ea14..26171829eb 100644 --- a/tests/extract_docs_code.py +++ b/tests/extract_docs_code.py @@ -115,8 +115,6 @@ def on_config(self, config: MkDocsConfig): # After pymdownx.highlight, because of weird registering deleting the first # extension config["markdown_extensions"].append(self.ext) - config["markdown_extensions"].remove("pymdownx.highlight") - config["markdown_extensions"].remove("fenced_code") def on_pre_build(self, *, config: MkDocsConfig): mkdocstrings_plugin: MkdocstringsPlugin = config.plugins["mkdocstrings"]