Skip to content

Commit

Permalink
Enhancements for the Concepts macro (#1093)
Browse files Browse the repository at this point in the history
This PR:
- Makes concepts throw an error when a wikidata ID is provided without a
label. Fixes #1076
- Uses lowercase wikidata labels in the concept tags, and only
capitalizes them when generating links. Fixes #1077
- Disallows newlines in quoted parts of the macro. Fixes #1092 
- Correctly finds Agda definitions which have single-quotes in them.
Fixes #1075
- Adds "(disambiguation)" to concept names in the database, when a
disambiguation is provided
  • Loading branch information
VojtechStep committed Mar 23, 2024
1 parent 1d6bcb1 commit cacce49
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 19 deletions.
64 changes: 54 additions & 10 deletions scripts/preprocessor_concepts.py
Expand Up @@ -11,13 +11,16 @@
from utils import eprint

CONCEPT_REGEX = re.compile(
r'\{\{#concept "([^"]+)"(.*?)\}\}')
r'\{\{#concept "([^=\n"]+)"(.*?)\}\}')

WIKIDATA_ID_REGEX = re.compile(
r'WDID=(\S+)')

WIKIDATA_LABEL_REGEX = re.compile(
r'WD="([^"]+)"')
r'WD="([^=\n"]+)"')

DISAMBIGUATION_REGEX = re.compile(
r'Disambiguation="([^=\n"]+)"')

AGDA_REGEX = re.compile(
r'Agda=(\S+)')
Expand All @@ -30,10 +33,26 @@

LEFTOVER_CONCEPT_REGEX = re.compile(r'\{\{#concept.*')

# Python's html.escape transforms a single quote into ', but Agda
# transforms it into ', so we just rewrite the replacement ourselves.
#
# https://github.com/jaspervdj/blaze-markup/blob/master/src/Text/Blaze/Renderer/String.hs#L25
ESCAPE_TRANSLATION_TABLE = str.maketrans({
'<': '&lt;',
'>': '&gt;',
'&': '&amp;',
'"': '&quot;',
"'": '&#39;'
})


def agda_escape_html(string):
return string.translate(ESCAPE_TRANSLATION_TABLE)


def make_definition_regex(definition):
return re.compile(
r'<a id="(\d+)" href="[^"]+" class="[^"]+">' + re.escape(definition) + r'</a>')
r'<a id="(\d+)" href="[^"]+" class="[^"]+">' + re.escape(agda_escape_html(definition)) + r'</a>')


def does_support(backend):
Expand All @@ -54,6 +73,13 @@ def match_wikidata_label(meta_text):
return m.group(1)


def match_disambiguation(meta_text):
m = DISAMBIGUATION_REGEX.search(meta_text)
if m is None:
return None
return m.group(1)


def match_agda_name(meta_text):
m = AGDA_REGEX.search(meta_text)
if m is None:
Expand All @@ -66,15 +92,13 @@ def get_definition_id(name, content):
m = definition_regex.search(content)
if m is None:
return None

return m.group(1)


def slugify_markdown(md):
text = md.replace(' ', '-')
for markup_char in ['*', '_', '~', '(', ')']:
text = text.replace(markup_char, '')

return text


Expand All @@ -89,28 +113,50 @@ def sub_match_for_concept(m, mut_index, mut_error_locations, config, path, initi
metadata = m.group(2)
wikidata_id = match_wikidata_id(metadata)
wikidata_label = match_wikidata_label(metadata)
disambiguation = match_disambiguation(metadata)
agda_name = match_agda_name(metadata)
plaintext = LINK_REGEX.sub(r'\1', text)
url_path = path[:-2] + 'html'
entry_name = plaintext
if disambiguation is not None:
entry_name += ' (' + disambiguation + ')'
index_entry = {
'name': plaintext,
'name': entry_name,
'text': text
}
anchor = ''
target = ''
target_id = f'concept-{slugify_markdown(plaintext)}'
references = []
if wikidata_id is not None and wikidata_id != 'NA':

has_wikidata_id = wikidata_id is not None and wikidata_id != 'NA'
has_wikidata_label = wikidata_label is not None

if has_wikidata_id:
index_entry['wikidata'] = wikidata_id
# index_entry['link'] = f'{url_path}#{wikidata_id}'
target_id = wikidata_id

if has_wikidata_label:
index_entry['__wikidata_label'] = wikidata_label
else:
eprint('Warning: Wikidata identifier', wikidata_id,
'provided for "' + entry_name + '"',
'without a corresponding label in', path)
mut_error_locations.add(path)
# Useful if we wanted to link to a concept by WDID and give information
# to scrapers; currently we don't really want either
# anchor += f'<a id="{target_id}" class="wikidata"><span style="display:none">{plaintext}</span></a>'

# TODO: decide if we want this
# references.append(sup_link_reference(config.get(
# 'mathswitch-template').format(wikidata_id=wikidata_id), 'WD', True, True))
else:
if has_wikidata_label:
eprint('Warning: Wikidata label', wikidata_label,
'provided for "' + entry_name + '"',
'without a corresponding identifier in', path)
mut_error_locations.add(path)
if agda_name is not None:
target_id = f'concept-{agda_name}'
agda_id = get_definition_id(agda_name, initial_content)
Expand All @@ -132,8 +178,6 @@ def sub_match_for_concept(m, mut_index, mut_error_locations, config, path, initi
# as we can get
index_entry['id'] = index_entry['link']
references.append(sup_link_reference(f'#{target_id}', '¶', False))
if wikidata_label is not None:
index_entry['__wikidata_label'] = wikidata_label
mut_index.append(index_entry)
return f'{anchor}<b>{text}</b>{"".join(reversed(references))}'

Expand All @@ -159,7 +203,7 @@ def tag_concepts_chapter_rec_mut(chapter, config, mut_index, mut_error_locations
mathswitch_link = config.get(
'mathswitch-template').format(wikidata_id=wikidata_id)
external_references.append(
f'<a href="{mathswitch_link}">{wikidata_label}</a> at Mathswitch')
f'<a href="{mathswitch_link}">{wikidata_label.capitalize()}</a> at Mathswitch')
wikidata_link = config.get(
'wikidata-template').format(wikidata_id=wikidata_id)
# TODO: Decide if we want this
Expand Down
2 changes: 1 addition & 1 deletion src/graph-theory/acyclic-undirected-graphs.lagda.md
Expand Up @@ -19,7 +19,7 @@ open import graph-theory.undirected-graphs
## Idea

An
{{#concept "acyclic undirected graph" WDID=Q3115453 WD="Acyclic graph" Agda=is-acyclic-Undirected-Graph}}
{{#concept "acyclic undirected graph" WDID=Q3115453 WD="acyclic graph" Agda=is-acyclic-Undirected-Graph}}
is an [undirected graph](graph-theory.undirected-graphs.md) of which the
[geometric realization](graph-theory.geometric-realizations-undirected-graphs.md)
is [contractible](foundation-core.contractible-types.md).
Expand Down
2 changes: 1 addition & 1 deletion src/graph-theory/circuits-undirected-graphs.lagda.md
Expand Up @@ -21,7 +21,7 @@ open import graph-theory.undirected-graphs

## Idea

A {{#concept "circuit" Agda=circuit-Undirected-Graph WD="Cycle" WDID=Q245595}}
A {{#concept "circuit" Agda=circuit-Undirected-Graph WD="cycle" WDID=Q245595}}
in an [undirected graph](graph-theory.undirected-graphs.md) `G` consists of a
[`k`-gon](graph-theory.polygons.md) `H` equipped with a
[totally faithful](graph-theory.totally-faithful-morphisms-undirected-graphs.md)
Expand Down
2 changes: 1 addition & 1 deletion src/graph-theory/closed-walks-undirected-graphs.lagda.md
Expand Up @@ -22,7 +22,7 @@ open import graph-theory.undirected-graphs
## Idea

A
{{#concept "closed walk" Agda=closed-walk-Undirected-Graph Disambiguation="undirected graph" WDID=Q245595 WD="Cycle"}}
{{#concept "closed walk" Agda=closed-walk-Undirected-Graph Disambiguation="undirected graph" WDID=Q245595 WD="cycle"}}
of length `k : ℕ` in an [undirected graph](graph-theory.undirected-graphs.md)
`G` is a [morphism](graph-theory.morphisms-undirected-graphs.md) of graphs from
a [`k`-gon](graph-theory.polygons.md) into `G`.
Expand Down
2 changes: 1 addition & 1 deletion src/graph-theory/complete-bipartite-graphs.lagda.md
Expand Up @@ -27,7 +27,7 @@ open import univalent-combinatorics.finite-types

Consider two [finite sets](univalent-combinatorics.finite-types.md) `X` and `Y`.
The
{{#concept "complete bipartite graph" Agda=complete-bipartite-Undirected-Graph-𝔽 WDID=Q913598 WD="Complete bipartite graph"}}
{{#concept "complete bipartite graph" Agda=complete-bipartite-Undirected-Graph-𝔽 WDID=Q913598 WD="complete bipartite graph"}}
on `X` and `Y` is the [undirected finite graph](graph-theory.finite-graphs.md)
consisting of:

Expand Down
2 changes: 1 addition & 1 deletion src/graph-theory/complete-multipartite-graphs.lagda.md
Expand Up @@ -26,7 +26,7 @@ open import univalent-combinatorics.function-types

Consider a family of [finite types](univalent-combinatorics.finite-types.md) `Y`
indexed by a finite type `X`. The
{{#concept "complete multipartite graph" Agda=complete-multipartite-Undirected-Graph-𝔽 WD="Multipartite graph" WDID=Q1718082}}
{{#concept "complete multipartite graph" Agda=complete-multipartite-Undirected-Graph-𝔽 WD="multipartite graph" WDID=Q1718082}}
at `Y` is the [finite undirected graph](graph-theory.finite-graphs.md)
consisting of:

Expand Down
2 changes: 1 addition & 1 deletion src/graph-theory/complete-undirected-graphs.lagda.md
Expand Up @@ -20,7 +20,7 @@ open import univalent-combinatorics.finite-types
## Idea

A
{{#concept "complete undirected graph" Agda=complete-Undirected-Graph-𝔽 WD="Complete graph" WDID=Q45715}}
{{#concept "complete undirected graph" Agda=complete-Undirected-Graph-𝔽 WD="complete graph" WDID=Q45715}}
is a [complete multipartite graph](graph-theory.complete-multipartite-graphs.md)
in which every block has exactly one vertex. In other words, it is an
[undirected graph](graph-theory.undirected-graphs.md) in which every vertex is
Expand Down
2 changes: 1 addition & 1 deletion src/graph-theory/cycles-undirected-graphs.lagda.md
Expand Up @@ -22,7 +22,7 @@ open import graph-theory.undirected-graphs
## Idea

A
{{#concept "cycle" Agda=cycle-Undirected-Graph Disambiguation="undirected graph" WD="Cycle" WDID=Q245595}}
{{#concept "cycle" Agda=cycle-Undirected-Graph Disambiguation="undirected graph" WD="cycle" WDID=Q245595}}
in an [undirected graph](graph-theory.undirected-graphs.md) `G` consists of a
[`k`-gon](graph-theory.polygons.md) `H` equipped with an
[embedding of graphs](graph-theory.embeddings-undirected-graphs.md) from `H`
Expand Down
Expand Up @@ -20,7 +20,7 @@ open import univalent-combinatorics.standard-finite-types
## Idea

A
{{#concept "directed graph structure" WD="Directed graph" WDID=Q1137726 Agda=structure-directed-graph-Fin}}
{{#concept "directed graph structure" WD="directed graph" WDID=Q1137726 Agda=structure-directed-graph-Fin}}
on a [standard finite set](univalent-combinatorics.standard-finite-types.md)
`Fin n` is a [binary type-valued relation](foundation.binary-relations.md)

Expand Down
2 changes: 1 addition & 1 deletion src/real-numbers/dedekind-real-numbers.lagda.md
Expand Up @@ -42,7 +42,7 @@ open import foundation-core.truncation-levels
## Idea

A
{{#concept "Dedekind cut" Agda=is-dedekind-cut WD="Dedekind cut" WDID=Q851333}}
{{#concept "Dedekind cut" Agda=is-dedekind-cut WD="dedekind cut" WDID=Q851333}}
consists of a [pair](foundation.dependent-pair-types.md) `(L , U)` of
[subtypes](foundation-core.subtypes.md) of
[the rational numbers](elementary-number-theory.rational-numbers.md) `ℚ`,
Expand Down

0 comments on commit cacce49

Please sign in to comment.