Skip to content

Commit

Permalink
Merge pull request #39 from ferrocene/jh/sync-with-monorepo
Browse files Browse the repository at this point in the history
Synchronize with monorepo
  • Loading branch information
Veykril authored Mar 21, 2024
2 parents e861e48 + fa1eab6 commit 81105a6
Show file tree
Hide file tree
Showing 16 changed files with 893 additions and 261 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@

__pycache__
*.py[co]
.env
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
This repository contains the shared Sphinx resources we rely on for our
Sphinx documentation websites, rather than duplicating them into each site.

🚨 **The contents of this repository are strictly meant for Ferrocene use** 🚨
🚨 **The contents of this repository are strictly meant for Ferrocene use** 🚨

While this repository is public and released under an open source license,
breaking changes will regularly happen, we will provide no support and
third-party pull requests likely won't be accepted.
Expand Down
186 changes: 186 additions & 0 deletions exts/ferrocene_autoglossary/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# SPDX-License-Identifier: MIT OR Apache-2.0
# SPDX-FileCopyrightText: The Ferrocene Developers

# This extension automatically inserts links across the whole document pointing
# to the relevant terms in the glossary, without having to manually annotate
# words with :term:`Glossary Entry`.
#
# The extension requires no configuration, and just adding a glossary directive
# is enough for the extension to work.

from . import debug, lexer
from .lexer import Term, MatchedTerm
from dataclasses import dataclass
from docutils import nodes
from sphinx import addnodes
from sphinx.environment.collectors import EnvironmentCollector
from sphinx.transforms import SphinxTransform
import sphinx
import string


class GlossaryCollector(EnvironmentCollector):
def clear_doc(self, app, env, docname):
state = State.get(env)
state.terms = [item for item in state.terms if item.document != docname]

def merge_other(self, app, env, docnames, other):
state = State.get(env)
other_state = State.get(other)
for item in other_state.terms:
if item.document in docnames:
state.terms.append(item)

def process_doc(self, app, document):
state = State.get(app.env)
for glossary in document.findall(addnodes.glossary):
for term in glossary.findall(nodes.term):
name = term.astext()
state.terms.append(
Term(
name=name,
document=app.env.docname,
anchor=term["ids"][0],
# If the term is only made of uppercase letters we
# treat it as an abbreviation rather than a term.
abbreviation=all(c in string.ascii_uppercase for c in name),
)
)


class LinkToTermsTransform(SphinxTransform):
default_priority = 100

def apply(self):
state = State.get(self.env)
for node in lexer.find_lexable_nodes(self.document):
self.apply_to_node(state, node.node)

def apply_to_node(self, state, node):
lexed = list(lexer.lexer(node.astext(), state.terms))
if len(lexed) == 1 and type(lexed[0]) is str:
# Do nothing if the lexed version returned the same string.
pass
else:
container = nodes.inline()
for part in lexed:
if type(part) is str:
container.append(nodes.Text(part))
elif type(part) is MatchedTerm:
container.append(self.make_link(part))
else:
raise RuntimeError("unexpected result of lexer")
node.parent.replace(node, container)

def make_link(self, matched):
node = sphinx.util.nodes.make_refnode(
self.app.builder,
self.env.docname,
matched.term.document,
matched.term.anchor,
nodes.Text(matched.text),
)
node["classes"].append("ferrocene-autoglossary")
return node


class PruneGlossaryTransform(SphinxTransform):
default_priority = 500

def apply(self):
glossaries = list(self.document.findall(addnodes.glossary))
if glossaries:
used_terms = self.discover_used_terms()
for glossary in glossaries:
self.prune(glossary, used_terms)

# We have to re-scan all the documents to see which terms are used,
# duplicating the scanning effort. Ideally we would first run the
# LinkToTermsTransform transform and collect from there the terms that were
# linked, and then pass that information to this transform.
#
# Unfortunately that's not possible, because each "post transform" is
# executed on the process handling that file, and there is no way for those
# processes to synchronize and communicate.
def discover_used_terms(self):
# The key of this dict is the name of the term, while the value is the
# "dependencies" of the term. When the term is referred to by any page
# other than the glossary the dependencies will be None, while if it's
# only referenced by other glossary entries the value will be the names
# of those entries.
used_terms = dict()

state = State.get(self.env)
for docname in self.env.all_docs.keys():
doctree = self.env.get_doctree(docname)
for node in lexer.find_lexable_nodes(doctree):
for part in lexer.lexer(node.node.astext(), state.terms):
if type(part) is not MatchedTerm:
continue
name = part.term.name
# Join the list of dependencies, setting None when either
# side is None.
if name not in used_terms:
used_terms[name] = node.inside_definition_of
elif used_terms[name] is not None:
if node.inside_definition_of is None:
used_terms[name] = None
else:
used_terms[name].update(node.inside_definition_of)

# Keep resolving dependencies until no change is made in the previous
# iteration. This makes sure that term "A" referred by term "B"
# referred by term "C" referred by the rest of the text is kept.
changed = True
while changed:
changed = False
for term, depends_on in dict(used_terms.items()).items():
if depends_on is None:
continue
for dependency in depends_on:
if dependency in used_terms and used_terms[dependency] is None:
used_terms[term] = None
changed = True
break

return {term for term, deps in used_terms.items() if deps is None}

def prune(self, glossary, used_terms):
for item in list(glossary.findall(nodes.definition_list_item)):
for term in item.findall(nodes.term):
if term.astext() in used_terms:
break
else:
item.parent.remove(item)


@dataclass
class State:
terms: list[Term]

def get(env):
key = "ferrocene_autoglossary"
if not hasattr(env, key):
setattr(env, key, State(terms=list()))
return getattr(env, key)


def setup(app):
app.add_env_collector(GlossaryCollector)
app.add_post_transform(LinkToTermsTransform)
app.add_post_transform(PruneGlossaryTransform)

debug.setup(app)

return {
"version": "0",
"parallel_read_safe": True,
"parallel_write_safe": True,
# The version needs to be updated whenever there is a breaking change
# in the data stored in the environment. Bumping the version number
# will ensure Sphinx will do a clean build.
#
# Version history:
# - 0: initial implementation
"env_version": "0",
}
74 changes: 74 additions & 0 deletions exts/ferrocene_autoglossary/debug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# SPDX-License-Identifier: MIT OR Apache-2.0
# SPDX-FileCopyrightText: The Ferrocene Developers

# This module appends to a file all lexed nodes that have at least one match
# within them, if the AUTOGLOSSARY_DEBUG_FILE environment variable is set when
# building the docs (and contains the path to that file). It is helpful when
# making changes to the lexer.

from . import lexer
from .lexer import MatchedTerm
import os


def write_lexer_outcome(app, env):
debug_file = os.environ.get("AUTOGLOSSARY_DEBUG_FILE")
if debug_file is None or app.builder.name == "ferrocene-intersphinx":
return

# The problem this tries to prevent is that the build system changes the
# current directory before invoking Sphinx, which will result in the file
# not being placed where the user expects with a relative path.
#
# This would be fixable by having the build system resolve the path before
# providing it to the extension, but it doesn't make sense to add that
# complexity for a rarely-used debug feature.
if not os.path.isabs(debug_file):
raise RuntimeError("AUTOGLOSSARY_DEBUG_FILE must be an absolute path")
output = open(debug_file, "a")

from . import State # Imported here to avoid circular dependencies

terms = State.get(env).terms

for docname in sorted(env.found_docs):
doctree = env.get_doctree(docname)
for node in lexer.find_lexable_nodes(doctree):
rendered = render_lexed_node(terms, node.node)
if rendered is None:
continue
rendered = rendered.replace("\n", " ")
output.write(f"{node_location(node.node)}: {rendered}\n")


def render_lexed_node(terms, node):
result = ""
has_matches = False

for token in lexer.lexer(node.astext(), terms):
if type(token) is MatchedTerm:
result += f"[{token.text}]"
has_matches = True
elif type(token) is str:
result += token
else:
raise RuntimeError("invalid token type")

if has_matches:
return result


def node_location(node):
if (
getattr(node, "source", None) is not None
and getattr(node, "line", None) is not None
):
return f"{node.source}:{node.line}"
elif node.parent is not None:
return node_location(node.parent)
else:
return "<unknown>"


def setup(app):
app.connect("env-check-consistency", write_lexer_outcome)
Loading

0 comments on commit 81105a6

Please sign in to comment.