diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a2b7339417..69f7f9a3d0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -98,4 +98,4 @@ jobs: - name: Build documentation run: | git fetch origin gh-pages - mike deploy --push --no-redirect --update-aliases $GITHUB_REF_NAME latest + mike deploy --push --alias-type=copy --update-aliases $GITHUB_REF_NAME latest diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6b4066b82d..ff6662d46c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -128,7 +128,7 @@ jobs: - name: Build documentation run: | - mike deploy --no-redirect --rebase --update-aliases $BRANCH_NAME latest + mike deploy --alias-type=copy --update-aliases $BRANCH_NAME latest mike set-default $BRANCH_NAME - name: Put content of gh-pages to public folder diff --git a/docs/assets/overrides/main.html b/docs/assets/overrides/main.html index c921ab3f7b..586ec84ab6 100644 --- a/docs/assets/overrides/main.html +++ b/docs/assets/overrides/main.html @@ -1,5 +1,12 @@ {% extends "base.html" %} +{% block scripts %} + + {{ super() }} + +{% endblock %} + + {% block announce %} Check out the new span classifier training tutorial ! {% endblock %} diff --git a/docs/assets/stylesheets/cards.css b/docs/assets/stylesheets/cards.css deleted file mode 100644 index 2a66d0bef7..0000000000 --- a/docs/assets/stylesheets/cards.css +++ /dev/null @@ -1,46 +0,0 @@ -.md-typeset .card-set { - grid-gap: .4rem; - display: grid; - grid-template-columns: repeat(auto-fit,minmax(16rem,1fr)); - margin: 1em 0 -} - -.md-typeset .card-set>a.card-content { - color: initial; -} - -.md-typeset .card-set>.card-content,.md-typeset .card-set>.card-content,.md-typeset .grid>.card { - border: .05rem solid var(--md-default-fg-color--lightest); - border-radius: .1rem; - display: block; - margin: 0; - padding: .8rem; - transition: border .25s,box-shadow .25s -} - -.md-typeset .card-set>.card-content:focus-within,.md-typeset .card-set>.card-content:hover,.md-typeset .card-set>.card-content:focus-within,.md-typeset .card-set>.card-content:hover,.md-typeset .grid>.card:focus-within,.md-typeset .grid>.card:hover { - border-color: #0000; - box-shadow: var(--md-shadow-z2) -} - -.md-typeset .card-set>.card-content>hr,.md-typeset .card-set>.card-content>hr,.md-typeset .grid>.card>hr { - margin-bottom: 1em; - margin-top: 1em -} - -.md-typeset .card-set>.card-content>:first-child,.md-typeset .card-set>.card-content>:first-child,.md-typeset .grid>.card>:first-child { - margin-top: 0 -} - -.md-typeset .card-set>.card-content>:last-child,.md-typeset .card-set>.card-content>:last-child,.md-typeset .grid>.card>:last-child { - margin-bottom: 0 -} - -.md-typeset .card-set>*,.md-typeset .card-set>.admonition,.md-typeset .card-set>.highlight>*,.md-typeset .card-set>.highlighttable,.md-typeset .card-set>.md-typeset details,.md-typeset .card-set>details,.md-typeset .card-set>pre { - margin-bottom: 0; - margin-top: 0 -} - -.md-typeset .card-set>.highlight>pre:only-child,.md-typeset .card-set>.highlight>pre>code,.md-typeset .card-set>.highlighttable,.md-typeset .card-set>.highlighttable>tbody,.md-typeset .card-set>.highlighttable>tbody>tr,.md-typeset .card-set>.highlighttable>tbody>tr>.code,.md-typeset .card-set>.highlighttable>tbody>tr>.code>.highlight,.md-typeset .card-set>.highlighttable>tbody>tr>.code>.highlight>pre,.md-typeset .card-set>.highlighttable>tbody>tr>.code>.highlight>pre>code { - height: 100% -} diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css index 90719615e9..30ebe620b4 100644 --- a/docs/assets/stylesheets/extra.css +++ b/docs/assets/stylesheets/extra.css @@ -53,6 +53,7 @@ } html { + font-size: 100% !important; } body, input { @@ -65,7 +66,7 @@ body, input { word-wrap: normal; color: var(--md-typeset-color); font-weight: 600; - scroll-margin-top: 1.25rem; + scroll-margin-top: 1.5625rem; letter-spacing: 0; } @@ -74,26 +75,17 @@ body, input { } .md-nav { - font-size: 0.8rem; + font-size: 1rem; } .md-typeset code { font-size: 0.95em; } -.md-typeset pre > code, .termy > [data-termynal], .highlighttable .linenos { - font-size: .75rem; -} - -.termy > [data-termynal] { - font-size: 0.8rem; - font-family: var(--md-code-font); - padding: 45px 45px 25px; +.md-typeset pre > code, .highlighttable .linenos { + font-size: .9375rem; } -.termy > [data-termynal] { - -} .md-typeset :is(.admonition,details) { font-size: inherit !important; @@ -125,21 +117,21 @@ body, input { @media screen and (min-width: 76.1875em) { .md-sidebar { - margin-top: 1.5rem; + margin-top: 1.875rem; } } @media screen and (min-width: 60em) { .md-nav--secondary .md-nav__title { background: var(--md-main-bg) !important; - box-shadow: 0 0 0.4rem 0.4rem var(--md-main-bg) !important; + box-shadow: 0 0 0.5rem 0.5rem var(--md-main-bg) !important; } } @media screen and (min-width: 76.25em) { .md-nav--primary .md-nav__title, .md-nav--secondary .md-nav__title, .md-nav--lifted > .md-nav__list > .md-nav__item--active > .md-nav__link { background: var(--md-main-bg) !important; - box-shadow: 0 0 0.4rem 0.4rem var(--md-main-bg) !important; + box-shadow: 0 0 0.5rem 0.5rem var(--md-main-bg) !important; } } @@ -152,7 +144,7 @@ body, input { } .md-content__inner { - margin-top: 1.5rem; + margin-top: 1.875rem; } .doc td > code { @@ -160,7 +152,7 @@ body, input { } .md-typeset .card-set { - grid-template-columns: repeat(auto-fit,minmax(12rem,1fr)) !important; + grid-template-columns: repeat(auto-fit,minmax(15rem,1fr)) !important; } .md-typeset code a:not(.md-annotation__index) { @@ -187,26 +179,8 @@ a.discrete-link { } .sourced-heading > a { - font-size: 1rem; + font-size: 1.25rem; align-content: center; - white-space: nowrap; -} - -.doc-param-details .subdoc { - padding: 0; - box-shadow: none; - border-color: var(--md-typeset-table-color); -} - -.doc-param-details .subdoc > div > div > div> table { - padding: 0; - box-shadow: none; - border: none; -} - -.doc-param-details .subdoc > summary { - margin: 0; - font-weight: normal; } /*.chip { @@ -306,3 +280,11 @@ a.discrete-link { color: #bababa; white-space: nowrap; } + +.md-content { + contain: paint; +} + +.hidden { + display: none !important; +} diff --git a/docs/assets/templates/python/material/class.html b/docs/assets/templates/python/material/class.html deleted file mode 100644 index ab982e5aea..0000000000 --- a/docs/assets/templates/python/material/class.html +++ /dev/null @@ -1,127 +0,0 @@ -
{% if show_full_path %}{{ class.path }}{% else %}{{ class.name }}{% endif %}
- {% endif %}
-
- {% with labels = class.labels %}
- {% include "labels.html" with context %}
- {% endwith %}
-
- {% endfilter %}
-
- {% if config.separate_signature and config.merge_init_into_class %}
- {% if "__init__" in class.members %}
- {% with function = class.members["__init__"] %}
- {% filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %}
- {% if show_full_path %}{{ class.path }}{% else %}{{ class.name }}{% endif %}
- {% endfilter %}
- {% endwith %}
- {% endif %}
- {% endif %}
-
- {% else %}
- {% if config.show_root_toc_entry %}
- {% filter heading(heading_level,
- role="class",
- id=html_id,
- toc_label=class.path if config.show_root_full_path else class.name,
- hidden=True) %}
- {% endfilter %}
- {% endif %}
- {% set heading_level = heading_level - 1 %}
- {% endif %}
-
-
- Bases: {% for expression in class.bases -%}
- {% include "expression.html" with context %}{% if not loop.last %}, {% endif %}
- {% endfor -%}
-
{{ class.relative_filepath }}{{ class.relative_filepath }}{{ section.title or "Parameters:" }}
-| Name | -Type | -Description | -Default | -
|---|---|---|---|
{{ parameter.name }} |
-
- {% if parameter.annotation %}
- {% with expression = parameter.annotation %}
- {% include "expression.html" with context %}
- {% endwith %}
- {% endif %}
- |
- {{ parameter.description|convert_markdown(heading_level, html_id) }} | -
- {% if parameter.default %}
- {% with expression = parameter.default %}
- {% include "expression.html" with context %}
- {% endwith %}
- {% else %}
- required
- {% endif %}
- |
-
{{ section.title or "Parameters:" }}
-{% include "expression.html" with context %})
- {% endwith %}
- {% endif %}
- – {{ parameter.description|convert_markdown(heading_level, html_id) }}
- | {{ (section.title or "PARAMETER").rstrip(":").upper() }} | -DESCRIPTION | -
|---|---|
{{ parameter.name }} |
-
- {{ parameter.description|convert_markdown(heading_level, html_id) }}
-
- {% if parameter.annotation %}
-
- TYPE:
- {% with expression = parameter.annotation %}
- |
-
{{ function.relative_filepath }}{authors}, {year}. {title}." - if journal: - ref += f" {journal}." - if volume: - ref += f" {volume}" - if issue: - ref += f"({issue})" - if pages: - ref += f", pp.{pages}" - ref += "." - if doi: - ref += f' {doi}' - elif url: - ref += f' {url}' - ref += "
" - - return etree.fromstring(ref) - - def formatCitation(self, ref): - author_list = list(map(self.formatAuthorSurname, ref.persons["author"])) - year = ref.fields.get("year") - - if len(author_list) == 1: - citation = f"{author_list[0]}" - elif len(author_list) == 2: - citation = f"{author_list[0]} and {author_list[1]}" - else: - citation = f"{author_list[0]} et al." - - citation += f", {year}" - - return citation - - def make_bibliography(self): - if self.order == "alphabetical": - raise (NotImplementedError) - - div = etree.Element("div") - div.set("class", "footnote") - div.append(etree.Element("hr")) - ol = etree.SubElement(div, "ol") - - if not self.citations: - return div - - # table = etree.SubElement(div, "table") - # table.set("class", "references") - # tbody = etree.SubElement(table, "tbody") - etree.SubElement(div, "div") - for id in self.citations: - li = etree.SubElement(ol, "li") - li.set("id", self.referenceID(id)) - # ref_id = etree.SubElement(li, "td") - ref_txt = etree.SubElement(li, "p") - if id in self.references: - self.extension.parser.parseChunk(ref_txt, self.references[id]) - elif id in self.bibsource: - ref_txt.append(self.formatReference(self.bibsource[id])) - else: - ref_txt.text = "Missing citation for {}".format(id) - - return div - - def clear_citations(self): - self.citations = OrderedDict() - - -class CitationsPreprocessor(Preprocessor): - """Gather reference definitions and citation keys""" - - def __init__(self, bibliography): - self.bib = bibliography - - def subsequentIndents(self, lines, i): - """Concatenate consecutive indented lines""" - linesOut = [] - while i < len(lines): - m = INDENT_RE.match(lines[i]) - if m: - linesOut.append(m.group(1)) - i += 1 - else: - break - return " ".join(linesOut), i - - def run(self, lines): - linesOut = [] - i = 0 - - while i < len(lines): - # Check to see if the line starts a reference definition - m = DEF_RE.match(lines[i]) - if m: - key = m.group(1) - reference = m.group(2) - indents, i = self.subsequentIndents(lines, i + 1) - reference += " " + indents - - self.bib.setReference(key, reference) - continue - - # Look for all @citekey patterns inside hard brackets - for bracket in BRACKET_RE.findall(lines[i]): - for c in CITE_RE.findall(bracket): - self.bib.addCitation(c) - linesOut.append(lines[i]) - i += 1 - - return linesOut - - -class CitationsPattern(Pattern): - """Handles converting citations keys into links""" - - def __init__(self, pattern, bibliography): - super(CitationsPattern, self).__init__(pattern) - self.bib = bibliography - - def handleMatch(self, m): - span = etree.Element("span") - for cite_match in CITE_RE.finditer(m.group(2)): - id = cite_match.group(1) - if id in self.bib.bibsource: - a = etree.Element("a") - a.set("id", self.bib.citationID(id)) - a.set("href", "./#" + self.bib.referenceID(id)) - a.set("class", "citation") - a.text = self.bib.labels[id] - span.append(a) - else: - continue - if len(span) == 0: - return None - return span - - -context_citations = None - - -class CitationsExtension(Extension): - def __init__(self): - super(CitationsExtension, self).__init__() - self.bib = None - - def extendMarkdown(self, md): - md.registerExtension(self) - self.parser = md.parser - self.md = md - - md.preprocessors.register(CitationsPreprocessor(self.bib), "mdx_bib", 15) - md.inlinePatterns.register( - CitationsPattern(CITATION_RE, self.bib), "mdx_bib", 175 - ) - - -def makeExtension(*args, **kwargs): - return CitationsExtension(*args, **kwargs) - - -class BibTexPlugin(BasePlugin): - config_scheme: Tuple[Tuple[str, MkType]] = ( - ("bibtex_file", MkType(str)), # type: ignore[assignment] - ("order", MkType(str, default="unsorted")), # type: ignore[assignment] - ) - - def __init__(self): - self.citations = None - - def on_config(self, config, **kwargs): - extension = CitationsExtension() - self.bib = Bibliography( - extension, - self, - self.config["bibtex_file"], - self.config["order"], - ) - extension.bib = self.bib - config["markdown_extensions"].append(extension) - - def on_page_content(self, html, page, config, files): - html += "\n" + etree_to_string(self.bib.make_bibliography()).decode() - self.bib.clear_citations() - return html diff --git a/docs/scripts/cards.py b/docs/scripts/cards.py deleted file mode 100644 index f0125bebb1..0000000000 --- a/docs/scripts/cards.py +++ /dev/null @@ -1,276 +0,0 @@ -""" -Adapted from pymdownx.tabbed (https://github.com/facelessuser/pymdown-extensions/) -""" -import re -import xml.etree.ElementTree as etree - -from markdown import Extension -from markdown.blockprocessors import BlockProcessor -from markdown.extensions.attr_list import AttrListTreeprocessor, get_attrs - - -def assign_attrs(elem, attrs): - """Assign `attrs` to element.""" - for k, v in get_attrs(attrs): - if k == ".": - # add to class - cls = elem.get("class") - if cls: - elem.set("class", "{} {}".format(cls, v)) - else: - elem.set("class", v) - else: - # assign attribute `k` with `v` - elem.set(AttrListTreeprocessor.NAME_RE.sub("_", k), v) - - -class CardProcessor(BlockProcessor): - """card block processor.""" - - START = re.compile(r"(?:^|\n)={3} *(card)?(?: +({:.*?}) *(?:\n|$))?") - COMPRESS_SPACES = re.compile(r" {2,}") - - def __init__(self, parser, config): - """Initialize.""" - - super().__init__(parser) - self.card_group_count = 0 - self.current_sibling = None - self.content_indention = 0 - - def detab_by_length(self, text, length): - """Remove a card from the front of each line of the given text.""" - - newtext = [] - lines = text.split("\n") - for line in lines: - if line.startswith(" " * length): - newtext.append(line[length:]) - elif not line.strip(): - newtext.append("") # pragma: no cover - else: - break - return "\n".join(newtext), "\n".join(lines[len(newtext) :]) - - def parse_content(self, parent, block): - """ - Get sibling card. - - Retrieve the appropriate sibling element. This can get tricky when - dealing with lists. - - """ - - old_block = block - non_cards = "" - card_set = "card-set" - - # We already acquired the block via test - if self.current_sibling is not None: - sibling = self.current_sibling - block, non_cards = self.detab_by_length(block, self.content_indent) - self.current_sibling = None - self.content_indent = 0 - return sibling, block, non_cards - - sibling = self.lastChild(parent) - - if ( - sibling is None - or sibling.tag.lower() != "div" - or sibling.attrib.get("class", "") != card_set - ): - sibling = None - else: - # If the last child is a list and the content is indented sufficient - # to be under it, then the content's is sibling is in the list. - last_child = self.lastChild(sibling) - card_content = "card-content" - child_class = ( - last_child.attrib.get("class", "") if last_child is not None else "" - ) - indent = 0 - while last_child is not None: - if ( - sibling is not None - and block.startswith(" " * self.tab_length * 2) - and last_child is not None - and ( - last_child.tag in ("ul", "ol", "dl") - or (last_child.tag == "div" and child_class == card_content) - ) - ): - # Handle nested card content - if last_child.tag == "div" and child_class == card_content: - temp_child = self.lastChild(last_child) - if temp_child is None or temp_child.tag not in ( - "ul", - "ol", - "dl", - ): - break - last_child = temp_child - - # The expectation is that we'll find an `
- text = sibling.text
- sibling.text = ""
- p = etree.SubElement(sibling, "p")
- p.text = text
- div = sibling
- elif sibling.tag == "div" and sibling.attrib.get("class", "") == card_set:
- # Get `card-content` under `card-set`
- div = self.lastChild(sibling)
- else:
- # Pass anything else as the parent
- div = sibling
-
- self.parser.parseChunk(div, block)
-
- if non_cards:
- # Insert the card content back into blocks
- blocks.insert(0, non_cards)
-
-
-class CardExtension(Extension):
- """Add card extension."""
-
- def __init__(self, *args, **kwargs):
- """Initialize."""
-
- self.config = {
- "slugify": [
- 0,
- "Slugify function used to create card specific IDs - Default: None",
- ],
- "combine_header_slug": [
- False,
- "Combine the card slug with the slug of the parent header - "
- "Default: False",
- ],
- "separator": ["-", "Slug separator - Default: '-'"],
- }
-
- super(CardExtension, self).__init__(*args, **kwargs)
-
- def extendMarkdown(self, md):
- """Add card to Markdown instance."""
- md.registerExtension(self)
-
- config = self.getConfigs()
-
- self.card_processor = CardProcessor(md.parser, config)
-
- md.parser.blockprocessors.register(
- self.card_processor,
- "card",
- 105,
- )
-
- def reset(self):
- """Reset."""
-
- self.card_processor.card_group_count = 0
-
-
-def makeExtension(*args, **kwargs):
- """Return extension."""
-
- return CardExtension(*args, **kwargs)
diff --git a/docs/scripts/clickable_snippets.py b/docs/scripts/clickable_snippets.py
deleted file mode 100644
index ccb836e15c..0000000000
--- a/docs/scripts/clickable_snippets.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Based on https://github.com/darwindarak/mdx_bib
-import os
-from bisect import bisect_right
-from collections import defaultdict
-from typing import Tuple
-
-import jedi
-import mkdocs.structure.pages
-import parso
-import regex
-from mkdocs.config.config_options import Type as MkType
-from mkdocs.config.defaults import MkDocsConfig
-from mkdocs.plugins import BasePlugin
-
-from docs.scripts.autorefs.plugin import AutorefsPlugin
-
-try:
- from importlib.metadata import entry_points
-except ImportError:
- from importlib_metadata import entry_points
-
-
-from bs4 import BeautifulSoup
-
-# Used to match href in HTML to replace with a relative path
-HREF_REGEX = (
- r"(?<=<\s*(?:a[^>]*href|img[^>]*src)=)"
- r'(?:"([^"]*)"|\'([^\']*)|[ ]*([^ =>]*)(?![a-z]+=))'
-)
-# Maybe find something less specific ?
-PIPE_REGEX = r"(?]*>eds<\/span>
-]*>[.]<\/span>
-]*>([a-zA-Z0-9._-]*)<\/span>
-(?![a-zA-Z0-9._-])
-"""
-
-REGISTRY_REGEX = r"""(?x)
-(?]*>(?:"|&\#39;|")@([a-zA-Z0-9._-]*)(?:"|&\#39;|")<\/span>\s*
-]*>:<\/span>\s*
-]*>\s*<\/span>\s*
-]*>(?:"|&\#39;|")?([a-zA-Z0-9._-]*)(?:"|&\#39;|")?<\/span>
-(?![a-zA-Z0-9._-])
-"""
-
-CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])"
-
-
-class ClickableSnippetsPlugin(BasePlugin):
- config_scheme: Tuple[Tuple[str, MkType]] = ()
-
- @mkdocs.plugins.event_priority(1000)
- def on_config(self, config: MkDocsConfig):
- for event_name, events in config.plugins.events.items():
- for event in list(events):
- if "autorefs" in str(event):
- events.remove(event)
- old_plugin = config["plugins"]["autorefs"]
- plugin_config = dict(old_plugin.config)
- plugin = AutorefsPlugin()
- config.plugins["autorefs"] = plugin
- config["plugins"]["autorefs"] = plugin
- plugin.load_config(plugin_config)
-
- @classmethod
- def get_ep_namespace(cls, ep, namespace=None):
- if hasattr(ep, "select"):
- return ep.select(group=namespace) if namespace else list(ep._all)
- else: # dict
- return (
- ep.get(namespace, [])
- if namespace
- else (x for g in ep.values() for x in g)
- )
-
- @mkdocs.plugins.event_priority(-1000)
- def on_post_page(
- self,
- output: str,
- page: mkdocs.structure.pages.Page,
- config: mkdocs.config.Config,
- ):
- """
- 1. Replace absolute paths with path relative to the rendered page
- This must be performed after all other plugins have run.
- 2. Replace component names with links to the component reference
-
- Parameters
- ----------
- output
- page
- config
-
- Returns
- -------
-
- """
-
- autorefs: AutorefsPlugin = config["plugins"]["autorefs"]
- ep = entry_points()
- page_url = os.path.join("/", page.file.url)
- factories_entry_points = {
- ep.name: ep.value
- for ep in (
- *self.get_ep_namespace(ep, "spacy_factories"),
- *self.get_ep_namespace(ep, "edsnlp_factories"),
- *self.get_ep_namespace(ep, "spacy_scorers"),
- )
- }
- all_entry_points = defaultdict(dict)
- for ep in self.get_ep_namespace(ep):
- if ep.group.startswith("edsnlp_") or ep.group.startswith("spacy_"):
- group = ep.group.split("_", 1)[1]
- all_entry_points[group][ep.name] = ep.value
-
- # This method is meant for replacing any component that
- # appears in a "eds.component" format, no matter if it is
- # preceded by a "@factory" or not.
- def replace_factory_component(match):
- full_match = match.group(0)
- name = "eds." + match.group(1)
- ep = factories_entry_points.get(name)
- preceding = output[match.start(0) - 50 : match.start(0)]
- if ep is not None and "DEFAULT:" not in preceding:
- try:
- url = autorefs.get_item_url(ep.replace(":", "."))
- except KeyError:
- pass
- else:
- return f"{name}"
- return full_match
-
- # This method is meant for replacing any component that
- # appears in a "@registry": "component" format
- def replace_any_registry_component(match):
- full_match = match.group(0)
- group = match.group(1)
- name = match.group(2)
- ep = all_entry_points[group].get(name)
- preceding = output[match.start(0) - 50 : match.start(0)]
- if ep is not None and "DEFAULT:" not in preceding:
- try:
- url = autorefs.get_item_url(ep.replace(":", "."))
- except KeyError:
- pass
- else:
- repl = f'{name}'
- before = full_match[: match.start(2) - match.start(0)]
- after = full_match[match.end(2) - match.start(0) :]
- return before + repl + after
- return full_match
-
- def replace_link(match):
- relative_url = url = match.group(1) or match.group(2) or match.group(3)
- if url.startswith("/"):
- relative_url = os.path.relpath(url, page_url)
- return f'"{relative_url}"'
-
- output = regex.sub(PIPE_REGEX, replace_factory_component, output)
- output = regex.sub(HTML_PIPE_REGEX, replace_factory_component, output)
- output = regex.sub(REGISTRY_REGEX, replace_any_registry_component, output)
-
- all_snippets = ""
- all_offsets = []
- all_nodes = []
-
- soups = []
-
- # Replace absolute paths with path relative to the rendered page
- for match in regex.finditer(".*?", output, flags=regex.DOTALL):
- node = match.group(0)
- if "\n" in node:
- soup, snippet, python_offsets, html_nodes = self.convert_html_to_code(
- node
- )
- size = len(all_snippets)
- all_snippets += snippet + "\n"
- all_offsets.extend([size + i for i in python_offsets])
- all_nodes.extend(html_nodes)
- soups.append((soup, match.start(0), match.end(0)))
-
- interpreter = jedi.Interpreter(all_snippets, [{}])
- line_lengths = [0]
- for line in all_snippets.split("\n"):
- line_lengths.append(len(line) + line_lengths[-1] + 1)
- line_lengths[-1] -= 1
-
- for name in self.iter_names(interpreter._module_node):
- try:
- line, col = name.start_pos
- offset = line_lengths[line - 1] + col
- node_idx = bisect_right(all_offsets, offset) - 1
-
- node = all_nodes[node_idx]
- gotos = interpreter.goto(line, col, follow_imports=True)
- gotos = [
- goto
- for goto in gotos
- if (
- goto
- and goto.full_name
- and goto.full_name.startswith("edsnlp")
- and goto.type != "module"
- )
- ]
- goto = gotos[0] if gotos else None
- if goto:
- url = autorefs.get_item_url(goto.full_name)
- # Check if node has no link in its upstream ancestors
- if not node.find_parents("a"):
- node.replace_with(
- BeautifulSoup(
- f'{node}',
- "html5lib",
- )
- )
- except Exception:
- pass
-
- # Re-insert soups into the output
- for soup, start, end in reversed(soups):
- output = output[:start] + str(soup.find("code")) + output[end:]
-
- output = regex.sub(HREF_REGEX, replace_link, output)
-
- return output
-
- @classmethod
- def iter_names(cls, root):
- if isinstance(root, parso.python.tree.Name):
- yield root
- for child in getattr(root, "children", ()):
- yield from cls.iter_names(child)
-
- @classmethod
- def convert_html_to_code(
- cls, html_content: str
- ) -> Tuple[BeautifulSoup, str, list, list]:
- pre_html_content = "" + html_content + "
"
- soup = list(BeautifulSoup(pre_html_content, "html5lib").children)[0]
- code_element = soup.find("code")
-
- line_lengths = [0]
- for line in pre_html_content.split("\n"):
- line_lengths.append(len(line) + line_lengths[-1] + 1)
- line_lengths[-1] -= 1
-
- python_code = ""
- code_offsets = []
- html_nodes = []
- code_offset = 0
-
- def extract_text_with_offsets(el):
- nonlocal python_code, code_offset
- for content in el.contents:
- # check not class md-annotation
- # Recursively process child elements
- if isinstance(content, str):
- python_code += content
- code_offsets.append(code_offset)
- code_offset += len(content)
- html_nodes.append(content)
- continue
- if "md-annotation" not in content.get("class", ""):
- extract_text_with_offsets(content)
-
- extract_text_with_offsets(code_element)
-
- return soup, python_code, code_offsets, html_nodes
diff --git a/docs/scripts/griffe_ext.py b/docs/scripts/griffe_ext.py
index deb9d1bdf6..1874cab08b 100644
--- a/docs/scripts/griffe_ext.py
+++ b/docs/scripts/griffe_ext.py
@@ -3,13 +3,17 @@
import inspect
import logging
import sys
-from typing import Union
+from typing import Any, Union
import astunparse
-from griffe import Extension, Object, ObjectNode
-from griffe.docstrings.dataclasses import DocstringSectionParameters
-from griffe.expressions import Expr
-from griffe.logger import patch_loggers
+from griffe import (
+ DocstringSectionParameters,
+ Expr,
+ Extension,
+ Object,
+ ObjectNode,
+ patch_loggers,
+)
def get_logger(name):
@@ -31,7 +35,12 @@ def __init__(self):
self.FACT_MEM = {}
self.PIPE_TO_FACT = {}
- def on_instance(self, node: Union[ast.AST, ObjectNode], obj: Object) -> None:
+ def on_instance(
+ self,
+ node: Union[ast.AST, ObjectNode],
+ obj: Object,
+ agent: Any = None,
+ ) -> None:
if (
isinstance(node, ast.Assign)
and obj.name == "create_component"
diff --git a/docs/scripts/plugin.py b/docs/scripts/plugin.py
deleted file mode 100644
index 60f2212b6f..0000000000
--- a/docs/scripts/plugin.py
+++ /dev/null
@@ -1,167 +0,0 @@
-import os
-from pathlib import Path
-
-import jedi
-import mkdocs.config
-import mkdocs.plugins
-import mkdocs.structure
-import mkdocs.structure.files
-import mkdocs.structure.nav
-import mkdocs.structure.pages
-from bs4 import BeautifulSoup
-
-
-def exclude_file(name):
- return name.startswith("assets/fragments/")
-
-
-# Add the files from the project root
-
-VIRTUAL_FILES = {}
-REFERENCE_TEMPLATE = """
-# `{ident}`
-::: {ident}
- options:
- show_source: false
-"""
-
-
-def on_files(files: mkdocs.structure.files.Files, config: mkdocs.config.Config):
- """
- Recursively the navigation of the mkdocs config
- and recursively content of directories of page that point
- to directories.
-
- Parameters
- ----------
- config: mkdocs.config.Config
- The configuration object
- kwargs: dict
- Additional arguments
- """
-
- root = Path("edsnlp")
- reference_nav = []
- for path in sorted(root.rglob("*.py")):
- module_path = path.relative_to(root.parent).with_suffix("")
- doc_path = Path("reference") / path.relative_to(root.parent).with_suffix(".md")
- # full_doc_path = Path("docs/reference/") / doc_path
- parts = list(module_path.parts)
- current = reference_nav
- for part in parts[:-1]:
- sub = next((item[part] for item in current if part in item), None)
- if sub is None:
- current.append({part: []})
- sub = current[-1][part]
- current = sub
- if parts[-1] == "__init__":
- parts = parts[:-1]
- doc_path = doc_path.with_name("index.md")
- current.append({"index.md": str(doc_path)})
- elif parts[-1] == "__main__":
- continue
- else:
- current.append({parts[-1]: str(doc_path)})
- ident = ".".join(parts)
- os.makedirs(doc_path.parent, exist_ok=True)
- VIRTUAL_FILES[str(doc_path)] = REFERENCE_TEMPLATE.format(ident=ident)
-
- for item in config["nav"]:
- if not isinstance(item, dict):
- continue
- key = next(iter(item.keys()))
- if not isinstance(item[key], str):
- continue
- if item[key].strip("/") == "reference":
- item[key] = reference_nav
-
- VIRTUAL_FILES["contributing.md"] = Path("contributing.md").read_text()
- VIRTUAL_FILES["changelog.md"] = Path("changelog.md").read_text()
-
- return mkdocs.structure.files.Files(
- [file for file in files if not exclude_file(file.src_path)]
- + [
- mkdocs.structure.files.File(
- file,
- config["docs_dir"],
- config["site_dir"],
- config["use_directory_urls"],
- )
- for file in VIRTUAL_FILES
- ]
- )
-
-
-def on_nav(nav, config, files):
- def rec(node):
- if isinstance(node, list):
- return [rec(item) for item in node]
- if node.is_section and node.title == "Code Reference":
- return
- if isinstance(node, mkdocs.structure.nav.Navigation):
- return rec(node.items)
- if isinstance(node, mkdocs.structure.nav.Section):
- if (
- len(node.children)
- and node.children[0].is_page
- and node.children[0].is_index
- ):
- first = node.children[0]
- link = mkdocs.structure.nav.Link(
- title=first.title,
- url=first.url,
- )
- link.is_index = True
- first.title = "Overview"
- node.children.insert(0, link)
- return rec(node.children)
-
- rec(nav.items)
-
-
-def on_page_read_source(page, config):
- if page.file.src_path in VIRTUAL_FILES:
- return VIRTUAL_FILES[page.file.src_path]
- return None
-
-
-# Get current git commit
-GIT_COMMIT = os.popen("git rev-parse --short HEAD").read().strip()
-
-
-@mkdocs.plugins.event_priority(-2000)
-def on_post_page(
- output: str,
- page: mkdocs.structure.pages.Page,
- config: mkdocs.config.Config,
-):
- """
- Add github links to the html output
- """
- # Find all the headings (h1, h2, ...) whose id starts with "edsnlp"
- soup = BeautifulSoup(output, "html.parser")
- for heading in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
- ref = heading.get("id", "")
- if ref.startswith("edsnlp.") and "--" not in ref:
- code = "import edsnlp; " + ref
- interpreter = jedi.Interpreter(code, namespaces=[{}])
- goto = interpreter.infer(1, len(code))
- try:
- file = goto[0].module_path.relative_to(Path.cwd())
- except Exception:
- goto = []
- if not goto:
- continue
- line = goto[0].line
- # Add a "[source]" span with a link to the source code in a new tab
- url = f"https://github.com/aphp/edsnlp/blob/{GIT_COMMIT}/{file}#L{line}"
- heading.append(
- BeautifulSoup(
- f''
- f'[source]',
- features="html.parser",
- )
- )
- # add "sourced-heading" to heading class
- heading["class"] = heading.get("class", []) + ["sourced-heading"]
- return str(soup)
diff --git a/docs/training/loggers.md b/docs/training/loggers.md
index 1b70b92472..cc06c6de59 100644
--- a/docs/training/loggers.md
+++ b/docs/training/loggers.md
@@ -22,7 +22,7 @@ You can configure loggers in `edsnlp.train` via the `logger` parameter of the `t
train:
...
logger:
- "@loggers": csv
+ "@loggers": csv !draft
...
```
@@ -50,13 +50,13 @@ You can configure loggers in `edsnlp.train` via the `logger` parameter of the `t
!!! note "Draft objects"
- `edsnlp.train` can provide a default project name and logging dir for loggers that require these parameters. For these loggers, if you don't want to set the project name yourself, you can either:
+ `edsnlp.train` can provide a default `project_name` and `logging_dir` for loggers that require these parameters. For these loggers, if you don't want to set the project name yourself, you can either:
- call `CSVLogger.draft(...)` without the normal init parameters minus the `project_name` or `logging_dir` parameters,
- which will cause a `Draft[CSVLogger]` object to be returned if some required parameters are missing
- - or use `"@loggers": csv !draft` in the config file, which will also cause a `Draft[CSVLogger]` object to be returned if some required
- parameters are missing
- - use the shorthand `logger: ["csv", "tensorboard", ...]`, which will use the default project name and logging dir
+ which will cause a `Draft[CSVLogger]` object to be returned, which be instantiated later when the required parameters
+ are available
+ - or use `"@loggers": csv !draft` in the config file, which is the config file equivalent to the `.draft()` method above
+ - use the string shorthands `logger: ["csv", "tensorboard", ...]`, which will use the default project name and logging dir
The supported loggers are listed below.
diff --git a/docs/tutorials/training-ner.md b/docs/tutorials/training-ner.md
index ddad45a7b0..c3c151e823 100644
--- a/docs/tutorials/training-ner.md
+++ b/docs/tutorials/training-ner.md
@@ -165,7 +165,7 @@ Visit the [`edsnlp.train` documentation][edsnlp.training.trainer.train] for a li
span_setter: 'gold_spans'
loggers:
- - '@loggers': csv
+ - '@loggers': csv !draft
- '@loggers': rich
fields:
step: {}
diff --git a/mkdocs.yml b/mkdocs.yml
index 8a53f195e3..1271f334f5 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -3,7 +3,7 @@ site_name: EDS-NLP
repo_url: https://github.com/aphp/edsnlp
repo_name: aphp/edsnlp
-# copyright: Copyright © 2022 – Assistance Publique - Hôpitaux de Paris
+copyright: Copyright © 2025 – Assistance Publique - Hôpitaux de Paris
# extra:
# social:
@@ -145,13 +145,12 @@ nav:
- data/spark.md
- data/converters.md
- Training:
+ - training/training-api.md
- training/loggers.md
- Concepts:
- concepts/pipeline.md
- concepts/torch-component.md
- concepts/inference.md
- - Training:
- - training/training-api.md
- Metrics:
- metrics/index.md
- metrics/ner.md
@@ -180,30 +179,27 @@ extra:
extra_css:
- assets/stylesheets/extra.css
- - assets/stylesheets/cards.css
- #- assets/termynal/termynal.css
-
-extra_javascript:
- #- https://cdn.jsdelivr.net/npm/vega@5
- #- https://cdn.jsdelivr.net/npm/vega-lite@5
- #- https://cdn.jsdelivr.net/npm/vega-embed@6
- - https://polyfill.io/v3/polyfill.min.js?features=es6
- # - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
watch:
- contributing.md
- changelog.md
- edsnlp
-hooks:
- - docs/scripts/plugin.py
-
plugins:
+ - auto_gen_pages:
+ package_dirs: ["pret"]
+ reference_section: Reference
+ exclude_glob: assets/fragments/*
+ copy_files:
+ changelog.md: changelog.md
+ contributing.md: contributing.md
+ - fix_fonts:
+ - cards:
- redirects:
redirect_maps:
'pipes/trainable/span-qualifier.md': 'pipes/trainable/span-classifier.md'
'tutorials/training.md': 'training/training-api.md'
- - search
+ - search:
- minify:
minify_html: true
minify_js: true
@@ -213,11 +209,12 @@ plugins:
cache_safe: true
- autorefs:
priority:
- - .*
+ - '*'
- reference
+ - pret_snippet_renderer:
+ - mkdocstrings_options_templates:
- mkdocstrings:
enable_inventory: true
- custom_templates: docs/assets/templates
handlers:
python:
options:
@@ -230,9 +227,13 @@ plugins:
show_root_toc_entry: false
show_signature: false
merge_init_into_class: true
+ inventories:
+ - https://aphp.github.io/confit/latest/objects.inv
+ - https://percevalw.github.io/metanno/latest/objects.inv
+ - https://percevalw.github.io/pret/latest/objects.inv
- bibtex:
bibtex_file: "docs/references.bib"
- - clickable_snippets:
+ - clickable_code:
# Just uncomment the following lines to enable i18n
# and start creating .fr.md and .en.md files.
# - i18n:
@@ -267,7 +268,6 @@ markdown_extensions:
- pymdownx.emoji:
emoji_index: !!python/name:materialx.emoji.twemoji
emoji_generator: !!python/name:materialx.emoji.to_svg
- - docs.scripts.cards
validation:
absolute_links: ignore
diff --git a/pyproject.toml b/pyproject.toml
index f64ac98ae9..2c8ddc6520 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,23 +64,8 @@ dev-no-ml = [
"edsnlp[docs-no-ml]",
]
docs-no-ml = [
- "mike~=1.1.2",
- "mkdocs-charts-plugin==0.0.8",
- "mkdocs-img2fig-plugin==0.9.3",
- "mkdocs-material~=9.2.0",
- "mkdocs-section-index==0.3.4",
- "mkdocs~=1.5.2",
- "mkdocstrings>=0.20,<0.28.0",
- "mkdocstrings-python~=1.1",
- "mkdocs-minify-plugin",
- "mkdocs-redirects>=1.2.1;python_version>='3.8'",
- "markdown-grid-tables==0.4.0",
- "pybtex~=0.24.0",
- "pathspec>=0.11.1", # required by vendored mkdocs-autorefs PR
- "astunparse",
- "griffe<0.39",
- "jedi",
- "html5lib",
+ "mkdocs-eds @ git+https://github.com/percevalw/mkdocs-eds.git@main#egg=mkdocs-eds ; python_version>='3.9'",
+ "markdown-grid-tables==0.4.0; python_version>='3.9'",
]
ml = [
"rich-logger>=0.3.1",
@@ -333,11 +318,6 @@ where = ["."]
[project.entry-points."spacy_tokenizers"]
"eds.tokenizer" = "edsnlp.language:create_eds_tokenizer"
-[project.entry-points."mkdocs.plugins"]
-"bibtex" = "docs.scripts.bibtex:BibTexPlugin"
-"autorefs" = "docs.scripts.autorefs.plugin:AutorefsPlugin"
-"clickable_snippets" = "docs.scripts.clickable_snippets:ClickableSnippetsPlugin"
-
[build-system]
requires = [
"setuptools",
diff --git a/tests/extract_docs_code.py b/tests/extract_docs_code.py
index 55d499ea14..26171829eb 100644
--- a/tests/extract_docs_code.py
+++ b/tests/extract_docs_code.py
@@ -115,8 +115,6 @@ def on_config(self, config: MkDocsConfig):
# After pymdownx.highlight, because of weird registering deleting the first
# extension
config["markdown_extensions"].append(self.ext)
- config["markdown_extensions"].remove("pymdownx.highlight")
- config["markdown_extensions"].remove("fenced_code")
def on_pre_build(self, *, config: MkDocsConfig):
mkdocstrings_plugin: MkdocstringsPlugin = config.plugins["mkdocstrings"]