Skip to content

Commit

Permalink
basic inscription report
Browse files Browse the repository at this point in the history
  • Loading branch information
thvitt committed Apr 5, 2019
1 parent 65356cf commit 713b8a8
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 26 deletions.
7 changes: 5 additions & 2 deletions src/macrogen/graph.py
Expand Up @@ -315,7 +315,7 @@ def _load_from(self, load_from: Path):
self.closure = nx.transitive_closure(self.dag)
self._augment_details()

def node(self, spec: Union[Reference, date, str]):
def node(self, spec: Union[Reference, date, str], default=KeyError):
"""
Returns a node from the graph.
Args:
Expand Down Expand Up @@ -361,7 +361,10 @@ def first(iterable):
or _normalize_sigil(ref.label) == norm_spec))

except StopIteration:
raise KeyError("No node matching {!r} in the base graph.".format(spec))
if isinstance(default, KeyError):
raise KeyError("No node matching {!r} in the base graph.".format(spec))
else:
return default

def nodes(self, node_str: str, check: bool = False) -> List[Node]:
"""
Expand Down
10 changes: 1 addition & 9 deletions src/macrogen/main.py
Expand Up @@ -27,15 +27,7 @@ def main(argv=sys.argv):

if not options.skip_reports:
report.write_order_xml(graphs)
report.report_help()
report.report_refs(graphs)
report.report_scenes(graphs)
report.report_missing(graphs)
report.report_components(graphs)
report.report_conflicts(graphs)
report.report_sources(graphs)
report.report_index(graphs)
report.report_downloads(graphs)
report.generate_reports(graphs)
render_all()


Expand Down
71 changes: 58 additions & 13 deletions src/macrogen/report.py
Expand Up @@ -2,6 +2,7 @@
import os
from collections import defaultdict, Counter
from datetime import date, datetime
from functools import partial
from itertools import chain, repeat, groupby
from operator import itemgetter

Expand All @@ -16,15 +17,15 @@
import csv
from html import escape
from pathlib import Path
from typing import Iterable, List, Dict, Mapping, Tuple, Sequence, Union, Generator, Any, Optional
from typing import Iterable, List, Dict, Mapping, Tuple, Sequence, Union, Generator, Any, Optional, Set

import networkx as nx
from pandas import DataFrame

from .config import config
from .bibliography import BiblSource
from .graph import MacrogenesisInfo, EARLIEST, LATEST, DAY, Node, MultiEdge
from .graphutils import pathlink, collapse_timeline, expand_edges, in_path
from .graphutils import pathlink, collapse_timeline, expand_edges, in_path, remove_edges
from .datings import get_datings, AbsoluteDating
from .uris import Reference, Witness, Inscription, UnknownRef, AmbiguousRef
from .visualize import write_dot, simplify_graph
Expand Down Expand Up @@ -520,15 +521,15 @@ def report_downloads(graphs: MacrogenesisInfo):
<h4>Nodes</h4>
<p>The <strong>nodes</strong> are either URIs or dates in ISO 8601 format. URIs of the form
<code>faust://document/<var>scheme</var>/<var>sigil</var></code> denote a witness (document)
that has the identifier <var>sigil</var> in the respective identifier scheme.
<code>faust://inscription/<var>scheme</var>/<var>sigil</var>/<var>id</var></code> denote an
that has the identifier <var>sigil</var> in the respective identifier scheme.
<code>faust://inscription/<var>scheme</var>/<var>sigil</var>/<var>id</var></code> denote an
inscription (single “writing event”) on the respective document.</p>
<p>If some URI has a <var>scheme</var> ≠ <code>faustedition</code>, then it was not possible to map it to
a document in the edition. You may still try the sigil with the search. Otherwise, the document can be
displayed at <code>http://faustedition.net/document?sigil=<var>sigil</var></code>.
</p>
<p>Dates are always of the form YYYY-MM-DD.</p>
<h4>Edges</h4>
<p>The edges have attributes that describe them further:</p>
<table class="pure-table">
Expand Down Expand Up @@ -578,7 +579,7 @@ def report_downloads(graphs: MacrogenesisInfo):
with this assertion</td>
</tr>
</table>
<h4>MacrogenesisInfo</h4>
<p>The <a href='macrogen-info.zip'>macrogen-info.zip</a> file contains the data required to recreate the graph info
in the <a href='https://github.com/faustedition/faust-macrogen'>faust-macrogen</a> library. To do so, run:</p>
Expand Down Expand Up @@ -867,7 +868,7 @@ def report_index(graphs):
graph_options=dict(controlIconsEnabled=True, maxZoom=200))


def report_help():
def report_help(info: Optional[MacrogenesisInfo] = None):
target = config.path.report_dir

def demo_graph(u, v, extend=None, **edge_attr) -> nx.MultiDiGraph:
Expand Down Expand Up @@ -1070,8 +1071,8 @@ def report_stats(graphs: MacrogenesisInfo):

# now collect some info per witness:
for ref in refs:
preds = list(graphs.base.pred[ref])
succs = list(graphs.base.succ[ref])
preds = list(graphs.base.pred[ref]) if ref in graphs.base else []
succs = list(graphs.base.succ[ref]) if ref in graphs.base else []

pred_dates = [p for p in preds if isinstance(p, date)]
succ_dates = [s for s in succs if isinstance(s, date)]
Expand Down Expand Up @@ -1110,14 +1111,14 @@ def _dating_table():
<h2>Kanten (Aussagen)</h2>
<p>{len(edge_df)} Kanten, {(edge_df.kind != 'timeline').sum()} Datierungsaussagen:</p>
<pre>{edge_df.kind.value_counts()}</pre>
<p>{edge_df.ignore.sum()} Aussagen (manuell) ignoriert,
{edge_df.delete.sum()} widersprüchliche Aussagen (automatisch) ausgeschlossen
<p>{edge_df.ignore.sum()} Aussagen (manuell) ignoriert,
{edge_df.delete.sum()} widersprüchliche Aussagen (automatisch) ausgeschlossen
({len(edge_df[edge_df.delete].groupby(['start', 'end']))} ohne Parallelaussagen)
</p>
<h2>Absolute Datierungen</h2>
<table class="pure-table">
<thead><tr><td/><th>direkt</th><th>erschlossen</th><th>angepasst</th><th>fehlend</th></tr></thead>
<tbody>
<tr><th>Datumsuntergrenze</th>
Expand All @@ -1138,3 +1139,47 @@ def _dating_table():
write_html(config.path.report_dir / "stats.php", html, "Statistik")

return stat, dating_stat, edge_df



def report_inscriptions(info: MacrogenesisInfo):

# all documents that have inscriptions in their textual transcript
from .witnesses import all_documents
docs = all_documents()
docs_by_uri = {doc.uri: doc for doc in docs}
tt_inscriptions = {doc.uri : doc.inscriptions for doc in docs if doc.inscriptions}

# all documents that have inscriptions in the graph
inscriptions_from_graph = [ref for ref in info.base if isinstance(ref, Inscription)]
graph_inscriptions: Dict[str, Set[str]] = defaultdict(set)
for inscr in inscriptions_from_graph:
graph_inscriptions[inscr.witness.uri].add(inscr.inscription)

relevant_uris = {uri for uri in set(tt_inscriptions.keys()).union(graph_inscriptions.keys())}
# stripped = remove_edges(info.base, lambda _, __ ,attr: attr.get('copy'))

table = (HtmlTable()
.column('Dokument', lambda uri: _fmt_node(Witness.get(uri)))
.column('Inskriptionen Makrogenese')
.column('Inskriptionen Transkript'))

def uri_idx(uri):
wit = info.node(uri, None)
return wit.index if wit else 9999

for doc_uri in sorted(relevant_uris, key=uri_idx):
table.row((doc_uri,
'<br/>'.join(wit #f'<a href="{wit.filename.stem}">{wit.inscription}</a>'
for wit in graph_inscriptions[doc_uri]),
'<br/>'.join(docs_by_uri[doc_uri].inscriptions) if doc_uri in docs_by_uri else ''))
write_html(config.path.report_dir / 'inscriptions.php',
table.format_table(),
'Inskriptionen')



def generate_reports(info: MacrogenesisInfo):
report_functions = [fn for name, fn in globals().items() if name.startswith('report_')]
for report in report_functions:
report(info)
2 changes: 1 addition & 1 deletion src/macrogen/visualize.py
Expand Up @@ -233,7 +233,7 @@ def render_all(timeout=None):
with Pool() as pool:
global _render_queue
dots, _render_queue = _render_queue, []
result = list(tqdm(pool.imap_unordered(partial(render_file_alt, timeout=timeout), dots),
result = list(config.progress(pool.imap_unordered(partial(render_file_alt, timeout=timeout), dots),
desc='Rendering', total=len(dots), unit=' SVGs'))
not_rendered = [entry for entry in result if isinstance(entry, tuple)]
timeout = [path for path, err in not_rendered if isinstance(err, subprocess.TimeoutExpired)]
Expand Down
34 changes: 33 additions & 1 deletion src/macrogen/witnesses.py
@@ -1,4 +1,5 @@
import re
from collections import defaultdict
from pathlib import Path
from typing import List, Optional
import reprlib
Expand Down Expand Up @@ -35,6 +36,8 @@ def faust_uri(sigil: str, idno_type: Optional[str] = None, inscription: Optional
components.append(inscription)
return '/'.join(components)

def _ids(reference: str):
return [s.strip('#') for s in reference.split()]

class Document:

Expand All @@ -47,12 +50,41 @@ def __init__(self, source: Path):
tt_el: etree._Element = tree.find('//f:textTranscript', config.namespaces)
if tt_el is not None:
self.text_transcript: Path = config.path.data.joinpath(tt_el.base.replace('faust://xml/', ''), tt_el.get('uri'))
transcript: etree._Element = etree.parse(fspath(self.text_transcript))
transcript: etree._ElementTree = etree.parse(fspath(self.text_transcript))
self.inscriptions = transcript.xpath('//tei:change[@type="segment"]/@xml:id', namespaces=config.namespaces)
else:
self.text_transcript = None
self.inscriptions = []

def _verses(self, text_transcript: Optional[etree._ElementTree] = None):
if text_transcript is None:
text_transcript = etree.parse(fspath(self.text_transcript))

lines = text_transcript.xpath('//tei:l[@n]', namespaces=config.namespaces) + \
text_transcript.xpath('//tei:milestone[@unit="reflines"]', namespaces=config.namespaces)
insc_lines = defaultdict(list)
if self.inscriptions:
for line in lines:
prec = line.xpath('preceding::tei:milestone[@unit="stage"][1]', namespaces=config.namespaces)
linenos = _ids(line.get('n'))
if prec:
for insc in _ids(prec.get('change')):
insc_lines[insc].extend(linenos)
else:
insc_lines[''].extend(linenos)

contained = line.xpath('descendant-or-self::*/@change')
if contained is not None:
for change in contained:
for insc in _ids(change):
insc_lines[insc].extend(linenos)

return insc_lines





@property
def uri(self) -> str:
return faust_uri(self.sigil)
Expand Down

0 comments on commit 713b8a8

Please sign in to comment.