Skip to content

Commit

Permalink
Improved inscription problems table. Fixes #24.
Browse files Browse the repository at this point in the history
See http://dev.faustedition.net/macrogenesis/inscriptions -- cf. the
description at the end of the page, below the table.
  • Loading branch information
thvitt committed Apr 5, 2019
1 parent 713b8a8 commit 8f8d0c4
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 21 deletions.
82 changes: 61 additions & 21 deletions src/macrogen/report.py
@@ -1,32 +1,29 @@
import csv
import json
import os
import urllib
from collections import defaultdict, Counter
from datetime import date, datetime
from functools import partial
from html import escape
from itertools import chain, repeat, groupby
from operator import itemgetter
from pathlib import Path
from typing import Iterable, List, Dict, Mapping, Tuple, Sequence, Union, Generator, Optional, Set

import networkx as nx
import pandas as pd
import requests
import urllib
from lxml import etree
from lxml.builder import ElementMaker
from lxml.etree import Comment
from more_itertools import pairwise

import csv
from html import escape
from pathlib import Path
from typing import Iterable, List, Dict, Mapping, Tuple, Sequence, Union, Generator, Any, Optional, Set

import networkx as nx
from pandas import DataFrame

from .config import config
from .bibliography import BiblSource
from .graph import MacrogenesisInfo, EARLIEST, LATEST, DAY, Node, MultiEdge
from .graphutils import pathlink, collapse_timeline, expand_edges, in_path, remove_edges
from .config import config
from .datings import get_datings, AbsoluteDating
from .graph import MacrogenesisInfo, EARLIEST, LATEST, DAY, Node
from .graphutils import pathlink, collapse_timeline, expand_edges, in_path, remove_edges
from .uris import Reference, Witness, Inscription, UnknownRef, AmbiguousRef
from .visualize import write_dot, simplify_graph

Expand Down Expand Up @@ -195,8 +192,8 @@ def format_table(self, rows=None, row_attrs=None):
row_attrs = self.row_attrs
if row_attrs is None:
row_attrs = repeat({})
return self._format_header() + ''.join(
(self._format_row(row, **attrs) for row, attrs in zip(rows, row_attrs))) + self._format_footer()
return self._format_header() + '\n' + '\n'.join(
(self._format_row(row, **attrs) for row, attrs in zip(rows, row_attrs))) + '\n' + self._format_footer()


def write_html(filename: Path, content: str, head: str = None, breadcrumbs: List[Dict[str, str]] = [],
Expand Down Expand Up @@ -826,6 +823,7 @@ def report_index(graphs):
('missing', 'Fehlendes',
'Zeugen, zu denen keine Aussagen zur Makrogenese vorliegen, und unbekannte Zeugen'),
('sources', 'Quellen', 'Aussagen nach Quelle aufgeschlüsselt'),
('inscriptions', 'Inskriptionen', 'Inskriptionen in Makrogenese bzw. Transkript'),
('dag', 'sortierrelevanter Gesamtgraph',
'Graph aller für die Sortierung berücksichtigter Aussagen (einzoomen!)'),
('tred', 'transitive Reduktion',
Expand Down Expand Up @@ -1154,27 +1152,69 @@ def report_inscriptions(info: MacrogenesisInfo):
inscriptions_from_graph = [ref for ref in info.base if isinstance(ref, Inscription)]
graph_inscriptions: Dict[str, Set[str]] = defaultdict(set)
for inscr in inscriptions_from_graph:
graph_inscriptions[inscr.witness.uri].add(inscr.inscription)
graph_inscriptions[inscr.witness.uri].add(inscr)

relevant_uris = {uri for uri in set(tt_inscriptions.keys()).union(graph_inscriptions.keys())}
# stripped = remove_edges(info.base, lambda _, __ ,attr: attr.get('copy'))
stripped = remove_edges(info.base, lambda _, __ ,attr: attr.get('copy') or attr.get('kind') in ['inscription', 'orphan'])

table = (HtmlTable()
.column('Dokument', lambda uri: _fmt_node(Witness.get(uri)))
.column('Inskriptionen Makrogenese')
.column('Inskriptionen Transkript'))
.column('Inskriptionen Transkript')
.column('Dok.-Aussagen'))

def uri_idx(uri):
wit = info.node(uri, None)
return wit.index if wit else 9999

def ghlink(path: Path):
ghroot = config.xmlroot[:config.xmlroot.rindex('/')+1] # strip /macrogenesis
relative = str(path.relative_to(config.path.data))
return ghroot + relative



for doc_uri in sorted(relevant_uris, key=uri_idx):
wit = info.node(doc_uri, None)
if doc_uri in docs_by_uri:
document = docs_by_uri[doc_uri]
doc_tt_inscriptions = document.inscriptions
if document.text_transcript:
ttlink = ghlink(document.text_transcript)
if doc_tt_inscriptions:
transcript_links = '<br/>'.join(
f'<a href="{ttlink}#L{i.getparent().sourceline}">{i}</a>' for i in sorted(doc_tt_inscriptions))
else:
transcript_links = f'<a href="{ttlink}">(keine)</a>'
else:
transcript_links = f'<a href="{ghlink(document.source)}">(kein Transkript)</a>'
else:
transcript_links = '(unbekanntes Dokument)'
table.row((doc_uri,
'<br/>'.join(wit #f'<a href="{wit.filename.stem}">{wit.inscription}</a>'
for wit in graph_inscriptions[doc_uri]),
'<br/>'.join(docs_by_uri[doc_uri].inscriptions) if doc_uri in docs_by_uri else ''))
'<br/>'.join(f'<a href="{wit.filename.stem}">{wit.inscription}</a>'
for wit in sorted(graph_inscriptions[doc_uri])),
transcript_links,
stripped.in_degree(wit) + stripped.out_degree(wit) if wit and wit in stripped else ''))
write_html(config.path.report_dir / 'inscriptions.php',
table.format_table(),
table.format_table() + """
<h2>Erläuterungen</h2>
<p>
Im Idealfall stehen in den Makrogenese- und Transkriptspalten jedes Dokuments genau dieselben Inskriptionen und
in der Spalte <em>Dok.-Aussagen</em> eine 0. Abweichungen können wie folgt erklärt werden:</p>
<ul>
<li>Inskriptionen, die <strong>nur in der Makrogenesespalte</strong> auftauchen, sind entweder falsch
benannt oder nicht im Textuellen Transkript verzeichnet. Über den Link zur Inskription in der
Makrogenesespalte kommt man zu allen Aussagen über die Inskription, mit Links in die XML-Quellen.</li>
<li>Für Inskriptionen, die <strong>nur in der Transkriptspalte</strong> auftauchen, gibt es keine
Aussagen in den Makrogenesedaten, oder nur mit falschen Referenzen</li>
<li><strong>(eingeklammertes)</strong> in der Transkriptspalte deutet auf Fehler in den Makrogenese-Verweisen hin</li>
<li>Die Spalte <strong>Dok.-Aussagen</strong> enthält die Anzahl der Makrogenese-Aussagen über das
<em>Dokument</em> direkt statt über die Inskriptionen. Hier müsste das Verhältnis zwischen
Dokument und Inskriptionen geklärt werden. (In den Graphen sind von den Inskriptionen
kopierte Aussagen mit einer leeren Pfeilspitze dargestellt, direkte Aussagen mit einer schwarzen)
</li>
</ul>
""",
'Inskriptionen')


Expand Down
1 change: 1 addition & 0 deletions src/macrogen/witnesses.py
Expand Up @@ -42,6 +42,7 @@ def _ids(reference: str):
class Document:

def __init__(self, source: Path):
self.source = source
tree: etree._ElementTree = etree.parse(fspath(source))
self.kind = tree.getroot().tag
idno_els: List[etree.ElementBase] = tree.findall('//f:idno', config.namespaces)
Expand Down

0 comments on commit 8f8d0c4

Please sign in to comment.