From f8451aa44bf462c477ed19f4a4380c735b16e05e Mon Sep 17 00:00:00 2001 From: Thorsten Vitt Date: Thu, 9 May 2019 11:58:32 +0200 Subject: [PATCH] Experimental timeline visualisation --- src/macrogen/etc/logging.yaml | 1 + src/macrogen/graph.py | 23 +++- src/macrogen/report.py | 73 ++++++---- src/macrogen/timeline.html | 243 ++++++++++++++++++++++++++++++++++ src/macrogen/uris.py | 5 +- src/macrogen/witnesses.py | 20 ++- 6 files changed, 332 insertions(+), 33 deletions(-) create mode 100644 src/macrogen/timeline.html diff --git a/src/macrogen/etc/logging.yaml b/src/macrogen/etc/logging.yaml index 4705bee..fae37d9 100644 --- a/src/macrogen/etc/logging.yaml +++ b/src/macrogen/etc/logging.yaml @@ -14,6 +14,7 @@ filters: levels: macrogen.graph: INFO macrogen.fes: INFO + macrogen.witnesses: DEBUG file: (): macrogen.logging.LevelFilter default: INFO diff --git a/src/macrogen/graph.py b/src/macrogen/graph.py index d773c7e..a279df6 100644 --- a/src/macrogen/graph.py +++ b/src/macrogen/graph.py @@ -1,6 +1,7 @@ """ Functions to build the graphs and perform their analyses. """ +import json import pickle import re from collections import defaultdict, Counter @@ -76,6 +77,8 @@ def __init__(self, load_from: Optional[Path] = None): self.closure: nx.MultiDiGraph = None self.conflicts: List[MultiEdge] = [] self.simple_cycles: Set[Sequence[Tuple[Node, Node]]] = set() + self.order: List[Reference] = None + self.index: Dict[Reference, int] = None if load_from: self._load_from(load_from) @@ -214,8 +217,8 @@ def run_analysis(self): self._augment_details() def order_refs(self): - if hasattr(self, '_order'): - return self._order + if self.order: + return self.order logger.info('Creating sort order from DAG') @@ -229,12 +232,17 @@ def secondary_key(node): nodes = nx.lexicographical_topological_sort(self.dag, key=secondary_key) refs = [node for node in nodes if isinstance(node, Reference)] - self._order = refs - for index, ref in enumerate(refs): + self.order = refs + self._build_index() + for ref, index in self.index.items(): if ref in self.base.node: self.base.node[ref]['index'] = index + ref.index = index return refs + def _build_index(self): + self.index = {ref: i for (i, ref) in enumerate(self.order, start=1)} + def _augment_details(self): logger.info('Augmenting refs with data from graphs') for index, ref in enumerate(self.order_refs(), start=1): @@ -277,6 +285,9 @@ def save(self, outfile: Path): nx.write_gpickle(self.base, base_entry) with zip.open('simple_cycles.pickle', 'w') as sc_entry: pickle.dump(self.simple_cycles, sc_entry) + with zip.open('order.json', 'w') as order_entry: + text = TextIOWrapper(order_entry, encoding='utf-8') + json.dump([ref.uri for ref in self.order], text) with zip.open('config.yaml', 'w') as config_entry: config.save_config(config_entry) with zip.open('base.yaml', 'w') as base_entry: @@ -291,6 +302,10 @@ def _load_from(self, load_from: Path): self.base = nx.read_gpickle(base_entry) with zip.open('simple_cycles.pickle', 'r') as sc_entry: self.simple_cycles = pickle.load(sc_entry) + with zip.open('order.json', 'r') as order_entry: + uris = json.load(order_entry) + self.order = [Witness.get(uri) for uri in uris] + self._build_index() # Now reconstruct the other data: self.working: nx.MultiDiGraph = self.base.copy() diff --git a/src/macrogen/report.py b/src/macrogen/report.py index 002e59b..41b68cf 100644 --- a/src/macrogen/report.py +++ b/src/macrogen/report.py @@ -13,6 +13,7 @@ import networkx as nx import pandas as pd +import pkg_resources import requests from lxml import etree from lxml.builder import ElementMaker @@ -222,7 +223,8 @@ def write_html(filename: Path, content: str, head: str = None, breadcrumbs: List if head is not None: breadcrumbs = breadcrumbs + [dict(caption=head)] breadcrumbs = [dict(caption='Makrogenese-Lab', link='/macrogenesis')] + breadcrumbs - prefix = """ + prefix = f""" +
""" require = "requirejs(['faust_common', 'svg-pan-zoom'], function(Faust, svgPanZoom)" if graph_id is not None: @@ -361,7 +363,7 @@ class RefTable(HtmlTable): Builds a table of references. """ - def __init__(self, base: nx.MultiDiGraph, **table_attrs): + def __init__(self, graphs: MacrogenesisInfo, **table_attrs): super().__init__(data_sortable="true", **table_attrs) (self.column('Nr.', data_sortable="numericplus") .column('Knoten davor', data_sortable="numericplus") @@ -372,7 +374,8 @@ def __init__(self, base: nx.MultiDiGraph, **table_attrs): .column('erster Vers', data_sortable="numericplus") .column('Aussagen', data_sortable="numericplus") .column('Konflikte', data_sortable="numericplus")) - self.base = base + self.graphs = graphs + self.base = graphs.base def reference(self, ref: Reference, index: Optional[int] = None, write_subpage: bool = False): """ @@ -385,7 +388,7 @@ def reference(self, ref: Reference, index: Optional[int] = None, write_subpage: """ if ref in self.base: if index is None: - index = ref.rank # self.base.node[ref]['index'] + index = self.graphs.index.get(ref, -1) assertions = list(chain(self.base.in_edges(ref, data=True), self.base.out_edges(ref, data=True))) conflicts = [assertion for assertion in assertions if 'delete' in assertion[2] and assertion[2]['delete']] self.row((f'{index}', ref.rank, ref, ref, ref.earliest, ref.latest, @@ -632,7 +635,7 @@ def report_refs(graphs: MacrogenesisInfo): nx.write_gpickle(graphs.base, str(target / 'base.gpickle')) refs = graphs.order_refs() - overview = RefTable(graphs.base) + overview = RefTable(graphs) for index, ref in enumerate(refs, start=1): overview.reference(ref, index, write_subpage=True) @@ -1027,13 +1030,13 @@ def report_scenes(graphs: MacrogenesisInfo): .column('Gesamt')) for scene in SceneInfo.get().scenes: items = WitInscrInfo.get().by_scene[scene] - witnessTable = RefTable(graphs.base) + witnessTable = RefTable(graphs) scene_docs = [doc for doc in items if isinstance(doc, DocumentCoverage)] scene_inscr = [inscr for inscr in items if isinstance(inscr, InscriptionCoverage)] scene_refs = scene_docs + scene_inscr scene_wits = {graphs.node(doc.uri, default=None) for doc in scene_refs} - {None} scene_graph = graphs.subgraph(*scene_wits, context=False, abs_dates=True) - for wit in sorted(scene_wits, key=attrgetter('rank')): + for wit in sorted(scene_wits, key=lambda ref: graphs.index.get(ref, 0)): witnessTable.reference(wit) basename = 'scene_' + scene.n.replace('.', '-') subgraph_page = Path(basename + '-subgraph.php') @@ -1057,6 +1060,26 @@ def report_scenes(graphs: MacrogenesisInfo): write_html(target / "scenes.php", sceneTable.format_table(), head='nach Szene') +def report_unused(graphs: MacrogenesisInfo): + unused_nodes = set(node for node in graphs.base.node if isinstance(node, Reference)) - set(graphs.dag.node) + not_in_dag_table = RefTable(graphs) + for node in unused_nodes: + not_in_dag_table.reference(node) + + unindexed = [node for node in graphs.base.node if isinstance(node, Reference) and not node in graphs.index] + unindexed_table = RefTable(graphs) + for node in unindexed: + unindexed_table.reference(node) + + write_html(config.path.report_dir / 'unused.php', + f"""

{len(unused_nodes)} Zeugen existieren im Ausgangsgraphen, aber nicht im DAG:

+ {not_in_dag_table.format_table()} +

{len(unindexed)} Knoten haben auf unerklärliche Weise keinen Index:

+ {unindexed_table.format_table()} + """, + "Nicht eingeordnete Zeugen") + + def write_order_xml(graphs): target: Path = config.path.report_dir @@ -1174,24 +1197,25 @@ def _dating_table(): return stat, dating_stat, edge_df -def report_timeline(info: MacrogenesisInfo): - refs = info.order_refs() +def report_timeline(graphs: MacrogenesisInfo): + witinfo = WitInscrInfo.get() + + def rel_scenes(ref: Reference) -> List[str]: + info = witinfo.get().by_uri.get(ref.uri, None) + if info: + return sorted([scene.n for scene in info.max_scenes]) + else: + return [] + + refs = graphs.order_refs() data = [dict(start=ref.earliest.isoformat(), end=ref.latest.isoformat(), - content=_fmt_node(ref)) + content=_fmt_node(ref), id=ref.filename.stem, scenes=rel_scenes(ref), + index=graphs.index[ref]) for ref in refs if ref.earliest > EARLIEST and ref.latest < LATEST] - (config.path.report_dir / 'timeline.html').write_text(f""" - Zeitstrahl - - - - -
Lade Zeitstrahl ...
- - - """) + with (config.path.report_dir / 'timeline.json').open("wt") as data_out: + json.dump(data, data_out) + (config.path.report_dir / 'timeline.html').write_binary(pkg_resources.resource_string('macrogen', 'timeline.html')) def report_inscriptions(info: MacrogenesisInfo): @@ -1213,10 +1237,10 @@ def report_inscriptions(info: MacrogenesisInfo): lambda _, __, attr: attr.get('copy') or attr.get('kind') in ['inscription', 'orphan']) table = (HtmlTable() - .column('Dokument', lambda uri: _fmt_node(Witness.get(uri))) + .column('Dokument', lambda uri: _fmt_node(Witness.get(uri)), data_sortable_type='sigil') .column('Inskriptionen Makrogenese') .column('Inskriptionen Transkript') - .column('Dok.-Aussagen') + .column('Dok.-Aussagen', data_sortable_type='numericplus') .column('Graph')) def uri_idx(uri): @@ -1303,4 +1327,5 @@ def ghlink(path: Path): def generate_reports(info: MacrogenesisInfo): report_functions = [fn for name, fn in globals().items() if name.startswith('report_')] for report in report_functions: + logger.info('Running %s', report.__name__) report(info) diff --git a/src/macrogen/timeline.html b/src/macrogen/timeline.html new file mode 100644 index 0000000..cd65be0 --- /dev/null +++ b/src/macrogen/timeline.html @@ -0,0 +1,243 @@ + + + + + Faustedition – Zeitstrahl + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
+
+
+ Zeitstrahl wird geladen … +
+
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/macrogen/uris.py b/src/macrogen/uris.py index 50beb4e..f6bfd74 100644 --- a/src/macrogen/uris.py +++ b/src/macrogen/uris.py @@ -164,7 +164,7 @@ class UnknownRef(Reference): """ def __init__(self, uri): - self.uri = uri + super().__init__(uri) self.status = "unknown" @@ -174,7 +174,7 @@ class AmbiguousRef(Reference): """ def __init__(self, uri, wits): - self.uri = uri + super().__init__(uri) self.witnesses = frozenset(wits) self.status = 'ambiguous: ' + ", ".join(str(wit) for wit in sorted(self.witnesses)) @@ -207,6 +207,7 @@ class Witness(Reference): def __init__(self, doc_record): if isinstance(doc_record, dict): + super().__init__(doc_record.get('uri', '?')) self.__dict__.update(doc_record) self.status = '(ok)' else: diff --git a/src/macrogen/witnesses.py b/src/macrogen/witnesses.py index e7e5505..27df3d5 100644 --- a/src/macrogen/witnesses.py +++ b/src/macrogen/witnesses.py @@ -2,7 +2,7 @@ from collections import defaultdict from itertools import chain from pathlib import Path -from typing import List, Optional, Dict, Union +from typing import List, Optional, Dict, Union, Set import reprlib from .config import config @@ -18,6 +18,8 @@ """ +logger = config.getLogger(__name__) + def encode_sigil(sigil: str) -> str: """ @@ -157,6 +159,7 @@ def get(cls) -> 'SceneInfo': return cls._instance def __init__(self, et=None): + logger.debug('Reading scene info ...') if et is None: et = config.scenes_xml self.toplevel = [Scene(el) for el in et.xpath('/*/*')] @@ -182,7 +185,6 @@ def is_relevant_for(self, first: int, last: int): first <= interval['start'] and interval['end'] <= last for interval in self.intervals) - def _init_relevant_scenes(self): relevant_scenes = set() for scene in SceneInfo.get().scenes: @@ -191,6 +193,16 @@ def _init_relevant_scenes(self): elif self.is_relevant_for(scene.first, scene.last): relevant_scenes.add(scene) self.relevant_scenes = frozenset(relevant_scenes) + self.max_scenes = self._reduce_scenes(relevant_scenes) + + @staticmethod + def _reduce_scenes(scenes: Set[Scene]) -> Set[Scene]: + result = set(scenes) + while len(result) > 1 and any(scene.parent for scene in result): + result = {scene.parent if scene.parent else scene for scene in result} + return result + + class InscriptionCoverage(IntervalsMixin): @@ -222,11 +234,12 @@ def __init__(self, json): class WitInscrInfo: def __init__(self): + logger.debug('Loading document and witness coverage from bargraph ...') bargraph = config.genetic_bar_graph self.documents = [DocumentCoverage(doc) for doc in bargraph] self.by_scene: Dict[Scene, Union[InscriptionCoverage, DocumentCoverage]] = defaultdict(list) self.by_uri: Dict[str, Union[InscriptionCoverage, DocumentCoverage]] = dict() - for doc in self.documents: + for doc in config.progress(self.documents, desc='Analyzing documents', unit=' docs'): self.by_uri[doc.uri] = doc for inscription in doc.inscriptions: self.by_uri[inscription.uri] = inscription @@ -256,6 +269,7 @@ def resolve(self, arg: str, inscription: Optional[str]=None): def all_documents(path: Optional[Path] = None): + logger.debug('Reading inscription info from sources ...') if path is None: path = config.path.data.joinpath('document') return [Document(doc) for doc in config.progress(list(path.rglob('**/*.xml')))]